{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.4883207070890415, "calibration/batch_distribution_entropy": 0.2739739421553503, "calibration/confidence_entropy": 0.21793248029268142, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4604362091577833, "calibration/mean_confidence": 0.9143221468537565, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018576388888888906, "completions/max_length": 3895.6, "completions/max_terminated_length": 3895.6, "completions/mean_length": 514.4408813476563, "completions/mean_terminated_length": 524.181884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011999850001874977, "grad_norm": 0.0081259123980999, "learning_rate": 5.952380952380953e-07, "loss": 0.0056, "num_tokens": 9040567.0, "reward": 0.4569155514240265, "reward_std": 0.41827074289321897, "rewards/accuracy_reward": 0.2575520783662796, "rewards/brier_reward": 0.30908964276313783, "rewards/confidence_uniqueness_reward": 0.28769826889038086, "rewards/format_reward": 0.5966145753860473, "rewards/frontier_coverage_0": 0.27184249460697174, "rewards/frontier_coverage_1": 0.27184249460697174, "rewards/frontier_coverage_10": 0.27184249460697174, "rewards/frontier_coverage_15": 0.27184249460697174, "rewards/frontier_coverage_20": 0.27184249460697174, "rewards/frontier_coverage_25": 0.27184249460697174, "rewards/frontier_coverage_5": 0.27184249460697174, "rewards/frontier_entropy_batch_reward": -0.5705800533294678, "signal/accuracy_reward/centered_abs_mean": 0.30725369453430174, "signal/accuracy_reward/group_std_mean": 0.3699012637138367, "signal/accuracy_reward/group_zero_std_frac": 0.07500000149011612, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.3921299993991852, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15362684726715087, "signal/advantage_abs_mean": 0.848686158657074, "signal/advantage_pre_scale_abs_mean": 0.35823245644569396, "signal/advantage_pre_scale_std": 0.42261629104614257, "signal/advantage_std": 0.9842132687568664, "signal/brier_reward/centered_abs_mean": 0.3175659000873566, "signal/brier_reward/group_std_mean": 0.37283719182014463, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08106742650270463, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03175659067928791, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.23240519165992737, "signal/confidence_uniqueness_reward/group_std_mean": 0.2853622674942017, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05935205966234207, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02324051931500435, "signal/format_reward/centered_abs_mean": 0.44161783456802367, "signal/format_reward/group_std_mean": 0.4756269872188568, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.5638803482055664, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.22080891728401184, "signal/frontier_coverage_0/centered_abs_mean": 0.3085132300853729, "signal/frontier_coverage_0/group_std_mean": 0.36870989203453064, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.011261269636452197, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004411739017814398, "signal/frontier_coverage_1/centered_abs_mean": 0.3085132300853729, "signal/frontier_coverage_1/group_std_mean": 0.36870989203453064, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.011261269636452197, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004411739017814398, "signal/frontier_coverage_10/centered_abs_mean": 0.3085132300853729, "signal/frontier_coverage_10/group_std_mean": 0.36870989203453064, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011261269636452197, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004411739017814398, "signal/frontier_coverage_15/centered_abs_mean": 0.3085132300853729, "signal/frontier_coverage_15/group_std_mean": 0.36870989203453064, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011261269636452197, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004411739017814398, "signal/frontier_coverage_20/centered_abs_mean": 0.3085132300853729, "signal/frontier_coverage_20/group_std_mean": 0.36870989203453064, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.011261269636452197, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004411739017814398, "signal/frontier_coverage_25/centered_abs_mean": 0.3085132300853729, "signal/frontier_coverage_25/group_std_mean": 0.36870989203453064, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.011261269636452197, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004411739017814398, "signal/frontier_coverage_5/centered_abs_mean": 0.3085132300853729, "signal/frontier_coverage_5/group_std_mean": 0.36870989203453064, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.011261269636452197, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004411739017814398, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4511567711830139, "signal/frontier_entropy_batch_reward/group_std_mean": 0.48259199857711793, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.1152160570025444, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.045115678757429126, "step": 5 }, { "calibration/aurc": 0.5107523789601409, "calibration/batch_distribution_entropy": 0.24915467457321486, "calibration/confidence_entropy": 0.21520335761112702, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4658758918099065, "calibration/mean_confidence": 0.922980520374389, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01883680555555558, "completions/max_length": 3971.0, "completions/max_terminated_length": 3971.0, "completions/mean_length": 469.39539794921876, "completions/mean_terminated_length": 478.6153076171875, "completions/min_length": 0.0, "completions/min_terminated_length": 22.4, "epoch": 0.023999700003749954, "grad_norm": 0.05667172372341156, "learning_rate": 1.1904761904761906e-06, "loss": 0.0066, "num_tokens": 17530722.0, "reward": 0.5542843520641327, "reward_std": 0.3886994063854218, "rewards/accuracy_reward": 0.301215273141861, "rewards/brier_reward": 0.3664989948272705, "rewards/confidence_uniqueness_reward": 0.3651871979236603, "rewards/format_reward": 0.7378472208976745, "rewards/frontier_coverage_0": 0.3178509533405304, "rewards/frontier_coverage_1": 0.3178509533405304, "rewards/frontier_coverage_10": 0.3178509533405304, "rewards/frontier_coverage_15": 0.3178509533405304, "rewards/frontier_coverage_20": 0.3178509533405304, "rewards/frontier_coverage_25": 0.3178509533405304, "rewards/frontier_coverage_5": 0.3178509533405304, "rewards/frontier_entropy_batch_reward": -0.7023240089416504, "signal/accuracy_reward/centered_abs_mean": 0.3270073771476746, "signal/accuracy_reward/group_std_mean": 0.38548147678375244, "signal/accuracy_reward/group_zero_std_frac": 0.06666666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.4557223439216614, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1635036885738373, "signal/advantage_abs_mean": 0.8083321452140808, "signal/advantage_pre_scale_abs_mean": 0.32132325768470765, "signal/advantage_pre_scale_std": 0.392959201335907, "signal/advantage_std": 0.984190571308136, "signal/brier_reward/centered_abs_mean": 0.32042253017425537, "signal/brier_reward/group_std_mean": 0.3735620677471161, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08921760171651841, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.032042254135012625, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2168998122215271, "signal/confidence_uniqueness_reward/group_std_mean": 0.2739565551280975, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.060243000835180284, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02168998159468174, "signal/format_reward/centered_abs_mean": 0.3335828959941864, "signal/format_reward/group_std_mean": 0.40515110492706297, "signal/format_reward/group_zero_std_frac": 0.00555555559694767, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.45944651365280154, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1667914479970932, "signal/frontier_coverage_0/centered_abs_mean": 0.3215973138809204, "signal/frontier_coverage_0/group_std_mean": 0.37801494002342223, "signal/frontier_coverage_0/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01281338632106781, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0045988415367901325, "signal/frontier_coverage_1/centered_abs_mean": 0.3215973138809204, "signal/frontier_coverage_1/group_std_mean": 0.37801494002342223, "signal/frontier_coverage_1/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01281338632106781, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0045988415367901325, "signal/frontier_coverage_10/centered_abs_mean": 0.3215973138809204, "signal/frontier_coverage_10/group_std_mean": 0.37801494002342223, "signal/frontier_coverage_10/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01281338632106781, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0045988415367901325, "signal/frontier_coverage_15/centered_abs_mean": 0.3215973138809204, "signal/frontier_coverage_15/group_std_mean": 0.37801494002342223, "signal/frontier_coverage_15/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01281338632106781, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0045988415367901325, "signal/frontier_coverage_20/centered_abs_mean": 0.3215973138809204, "signal/frontier_coverage_20/group_std_mean": 0.37801494002342223, "signal/frontier_coverage_20/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01281338632106781, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0045988415367901325, "signal/frontier_coverage_25/centered_abs_mean": 0.3215973138809204, "signal/frontier_coverage_25/group_std_mean": 0.37801494002342223, "signal/frontier_coverage_25/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01281338632106781, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0045988415367901325, "signal/frontier_coverage_5/centered_abs_mean": 0.3215973138809204, "signal/frontier_coverage_5/group_std_mean": 0.37801494002342223, "signal/frontier_coverage_5/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01281338632106781, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0045988415367901325, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36690880060195924, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4326904654502869, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.10138487070798874, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03669087961316109, "step": 10 }, { "calibration/aurc": 0.5439928649912942, "calibration/batch_distribution_entropy": 0.3275892998380167, "calibration/confidence_entropy": 0.26077720370647295, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4911188693830021, "calibration/mean_confidence": 0.9035915660059434, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009895833333333326, "completions/max_length": 3997.8, "completions/max_terminated_length": 3997.8, "completions/mean_length": 426.6123352050781, "completions/mean_terminated_length": 430.9161010742188, "completions/min_length": 0.0, "completions/min_terminated_length": 32.4, "epoch": 0.03599955000562493, "grad_norm": 0.017714520916342735, "learning_rate": 1.7857142857142859e-06, "loss": -0.0218, "num_tokens": 25547280.0, "reward": 0.6818203568458557, "reward_std": 0.2930204153060913, "rewards/accuracy_reward": 0.3302083373069763, "rewards/brier_reward": 0.44146730899810793, "rewards/confidence_uniqueness_reward": 0.5243119597434998, "rewards/format_reward": 0.9497395992279053, "rewards/frontier_coverage_0": 0.3611275374889374, "rewards/frontier_coverage_1": 0.3611275374889374, "rewards/frontier_coverage_10": 0.3611275374889374, "rewards/frontier_coverage_15": 0.3611275374889374, "rewards/frontier_coverage_20": 0.3611275374889374, "rewards/frontier_coverage_25": 0.3611275374889374, "rewards/frontier_coverage_5": 0.3611275374889374, "rewards/frontier_entropy_batch_reward": -0.9088038682937623, "signal/accuracy_reward/centered_abs_mean": 0.31500651240348815, "signal/accuracy_reward/group_std_mean": 0.37460089921951295, "signal/accuracy_reward/group_zero_std_frac": 0.09166666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6708433270454407, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15750325620174407, "signal/advantage_abs_mean": 0.769465982913971, "signal/advantage_pre_scale_abs_mean": 0.23625607192516326, "signal/advantage_pre_scale_std": 0.30175902843475344, "signal/advantage_std": 0.9840420842170715, "signal/brier_reward/centered_abs_mean": 0.2925845801830292, "signal/brier_reward/group_std_mean": 0.3448193073272705, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.12465540021657943, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02925845831632614, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.19031396508216858, "signal/confidence_uniqueness_reward/group_std_mean": 0.23621676564216615, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0817145824432373, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01903139688074589, "signal/format_reward/centered_abs_mean": 0.08739691749215125, "signal/format_reward/group_std_mean": 0.15843217223882675, "signal/format_reward/group_zero_std_frac": 0.3861111253499985, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.17899880260229112, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.04369845874607563, "signal/frontier_coverage_0/centered_abs_mean": 0.30582007169723513, "signal/frontier_coverage_0/group_std_mean": 0.3615089595317841, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.018638250604271888, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004373226827010512, "signal/frontier_coverage_1/centered_abs_mean": 0.30582007169723513, "signal/frontier_coverage_1/group_std_mean": 0.3615089595317841, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.018638250604271888, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004373226827010512, "signal/frontier_coverage_10/centered_abs_mean": 0.30582007169723513, "signal/frontier_coverage_10/group_std_mean": 0.3615089595317841, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.018638250604271888, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004373226827010512, "signal/frontier_coverage_15/centered_abs_mean": 0.30582007169723513, "signal/frontier_coverage_15/group_std_mean": 0.3615089595317841, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018638250604271888, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004373226827010512, "signal/frontier_coverage_20/centered_abs_mean": 0.30582007169723513, "signal/frontier_coverage_20/group_std_mean": 0.3615089595317841, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018638250604271888, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004373226827010512, "signal/frontier_coverage_25/centered_abs_mean": 0.30582007169723513, "signal/frontier_coverage_25/group_std_mean": 0.3615089595317841, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018638250604271888, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004373226827010512, "signal/frontier_coverage_5/centered_abs_mean": 0.30582007169723513, "signal/frontier_coverage_5/group_std_mean": 0.3615089595317841, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.018638250604271888, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004373226827010512, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.15383070558309556, "signal/frontier_entropy_batch_reward/group_std_mean": 0.25853142738342283, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1277777798473835, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.06458796337246894, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015383070893585682, "step": 15 }, { "calibration/aurc": 0.4556803994965267, "calibration/batch_distribution_entropy": 0.5350678484304228, "calibration/buffer_distribution_entropy": 0.3417969188964092, "calibration/confidence_entropy": 0.3897717372305213, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0433420365535248, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3313420910147438, "calibration/mean_confidence": 0.8364994562189102, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010156249999999978, "completions/max_length": 3824.2, "completions/max_terminated_length": 3824.2, "completions/mean_length": 465.0840270996094, "completions/mean_terminated_length": 469.90997924804685, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.04799940000749991, "grad_norm": 0.014085509814321995, "learning_rate": 2.380952380952381e-06, "loss": -0.0318, "num_tokens": 34018744.0, "reward": 0.7619543671607971, "reward_std": 0.23002811074256896, "rewards/accuracy_reward": 0.43802083730697633, "rewards/brier_reward": 0.5893322110176087, "rewards/confidence_uniqueness_reward": 0.653409230709076, "rewards/format_reward": 0.985850703716278, "rewards/frontier_coverage_0": 0.19372253511101006, "rewards/frontier_coverage_1": 0.19372253511101006, "rewards/frontier_coverage_10": 0.19372253511101006, "rewards/frontier_coverage_15": 0.19372253511101006, "rewards/frontier_coverage_20": 0.19372253511101006, "rewards/frontier_coverage_25": 0.19372253511101006, "rewards/frontier_coverage_5": 0.19372253511101006, "rewards/frontier_entropy_batch_reward": -0.9364717721939086, "signal/accuracy_reward/centered_abs_mean": 0.2878797709941864, "signal/accuracy_reward/group_std_mean": 0.35460472106933594, "signal/accuracy_reward/group_zero_std_frac": 0.09444444626569748, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9924409985542297, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1439398854970932, "signal/advantage_abs_mean": 0.7534375190734863, "signal/advantage_pre_scale_abs_mean": 0.1824011266231537, "signal/advantage_pre_scale_std": 0.23983034789562224, "signal/advantage_std": 0.9837870955467224, "signal/brier_reward/centered_abs_mean": 0.23643364608287812, "signal/brier_reward/group_std_mean": 0.2890482544898987, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1609581083059311, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.023643364757299425, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.15150568187236785, "signal/confidence_uniqueness_reward/group_std_mean": 0.18613037019968032, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.09755237996578217, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015150568448007107, "signal/format_reward/centered_abs_mean": 0.02616644911468029, "signal/format_reward/group_std_mean": 0.058222611993551256, "signal/format_reward/group_zero_std_frac": 0.7305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08630450516939163, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013083224557340145, "signal/frontier_coverage_0/centered_abs_mean": 0.14219243675470353, "signal/frontier_coverage_0/group_std_mean": 0.18434092849493028, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.012311214115470648, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002033351955469698, "signal/frontier_coverage_1/centered_abs_mean": 0.14219243675470353, "signal/frontier_coverage_1/group_std_mean": 0.18434092849493028, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.012311214115470648, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002033351955469698, "signal/frontier_coverage_10/centered_abs_mean": 0.14219243675470353, "signal/frontier_coverage_10/group_std_mean": 0.18434092849493028, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012311214115470648, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002033351955469698, "signal/frontier_coverage_15/centered_abs_mean": 0.14219243675470353, "signal/frontier_coverage_15/group_std_mean": 0.18434092849493028, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012311214115470648, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002033351955469698, "signal/frontier_coverage_20/centered_abs_mean": 0.14219243675470353, "signal/frontier_coverage_20/group_std_mean": 0.18434092849493028, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012311214115470648, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002033351955469698, "signal/frontier_coverage_25/centered_abs_mean": 0.14219243675470353, "signal/frontier_coverage_25/group_std_mean": 0.18434092849493028, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.012311214115470648, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002033351955469698, "signal/frontier_coverage_5/centered_abs_mean": 0.14219243675470353, "signal/frontier_coverage_5/group_std_mean": 0.18434092849493028, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.012311214115470648, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002033351955469698, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.11082728952169418, "signal/frontier_entropy_batch_reward/group_std_mean": 0.21301989257335663, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.25833334028720856, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.07670077979564667, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011082728952169418, "step": 20 }, { "calibration/aurc": 0.32297898002690983, "calibration/batch_distribution_entropy": 0.6393962661945383, "calibration/buffer_distribution_entropy": 0.475857881253323, "calibration/confidence_entropy": 0.5880384168326668, "calibration/coverage@0%": 0.00737848722179194, "calibration/coverage@1%": 0.00737848722179194, "calibration/coverage@10%": 0.021514612876242203, "calibration/coverage@15%": 0.053983590903203496, "calibration/coverage@20%": 0.21906205622592817, "calibration/coverage@25%": 0.3800217791924946, "calibration/coverage@30%": 0.4740219432344893, "calibration/coverage@5%": 0.00737848722179194, "calibration/ece": 0.13618263020369642, "calibration/mean_confidence": 0.6823366961993423, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0109375, "completions/max_length": 3916.0, "completions/max_terminated_length": 3916.0, "completions/mean_length": 551.5579833984375, "completions/mean_terminated_length": 557.6826171875, "completions/min_length": 0.0, "completions/min_terminated_length": 100.2, "epoch": 0.05999925000937488, "grad_norm": 0.04915325343608856, "learning_rate": 2.9761904761904763e-06, "loss": -0.0291, "num_tokens": 43497140.0, "reward": 0.8319814205169678, "reward_std": 0.1857275605201721, "rewards/accuracy_reward": 0.5822916686534881, "rewards/brier_reward": 0.7326545953750611, "rewards/confidence_uniqueness_reward": 0.6608519792556763, "rewards/format_reward": 0.9865451455116272, "rewards/frontier_coverage_0": -0.014849835354834796, "rewards/frontier_coverage_1": -0.014849835354834796, "rewards/frontier_coverage_10": -0.014849835354834796, "rewards/frontier_coverage_15": -0.014849835354834796, "rewards/frontier_coverage_20": -0.014849835354834796, "rewards/frontier_coverage_25": -0.014849835354834796, "rewards/frontier_coverage_5": -0.014849835354834796, "rewards/frontier_entropy_batch_reward": -0.9030117869377137, "signal/accuracy_reward/centered_abs_mean": 0.24696180522441863, "signal/accuracy_reward/group_std_mean": 0.3145732879638672, "signal/accuracy_reward/group_zero_std_frac": 0.15555555671453475, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9943392634391784, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.12348090261220931, "signal/advantage_abs_mean": 0.7143722534179687, "signal/advantage_pre_scale_abs_mean": 0.14123885333538055, "signal/advantage_pre_scale_std": 0.19914124310016632, "signal/advantage_std": 0.9836650371551514, "signal/brier_reward/centered_abs_mean": 0.1310290887951851, "signal/brier_reward/group_std_mean": 0.1702386736869812, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.10493465960025787, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013102908991277218, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1781868025660515, "signal/confidence_uniqueness_reward/group_std_mean": 0.21007861495018004, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.14442408829927444, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01781868040561676, "signal/format_reward/centered_abs_mean": 0.02363823801279068, "signal/format_reward/group_std_mean": 0.04480181857943535, "signal/format_reward/group_zero_std_frac": 0.8166666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09280302375555038, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01181911900639534, "signal/frontier_coverage_0/centered_abs_mean": 0.08157578110694885, "signal/frontier_coverage_0/group_std_mean": 0.10721786618232727, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.009440916776657104, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0011665337020531296, "signal/frontier_coverage_1/centered_abs_mean": 0.08157578110694885, "signal/frontier_coverage_1/group_std_mean": 0.10721786618232727, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.009440916776657104, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0011665337020531296, "signal/frontier_coverage_10/centered_abs_mean": 0.08157578110694885, "signal/frontier_coverage_10/group_std_mean": 0.10721786618232727, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.009440916776657104, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011665337020531296, "signal/frontier_coverage_15/centered_abs_mean": 0.08157578110694885, "signal/frontier_coverage_15/group_std_mean": 0.10721786618232727, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.009440916776657104, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011665337020531296, "signal/frontier_coverage_20/centered_abs_mean": 0.08157578110694885, "signal/frontier_coverage_20/group_std_mean": 0.10721786618232727, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009440916776657104, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011665337020531296, "signal/frontier_coverage_25/centered_abs_mean": 0.08157578110694885, "signal/frontier_coverage_25/group_std_mean": 0.10721786618232727, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.009440916776657104, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011665337020531296, "signal/frontier_coverage_5/centered_abs_mean": 0.08157578110694885, "signal/frontier_coverage_5/group_std_mean": 0.10721786618232727, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.009440916776657104, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0011665337020531296, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.16028570830821992, "signal/frontier_entropy_batch_reward/group_std_mean": 0.27135405838489535, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.15833333898335694, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.12926736772060393, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.016028571128845214, "step": 25 }, { "calibration/aurc": 0.28283994378691124, "calibration/batch_distribution_entropy": 0.8098021020102317, "calibration/buffer_distribution_entropy": 0.6103832596029344, "calibration/confidence_entropy": 0.5821678574801844, "calibration/coverage@0%": 0.006513023807924029, "calibration/coverage@1%": 0.006513023807924029, "calibration/coverage@10%": 0.023857197249658447, "calibration/coverage@15%": 0.03540575368010464, "calibration/coverage@20%": 0.18533627087873336, "calibration/coverage@25%": 0.4079531033741429, "calibration/coverage@30%": 0.5827169258003846, "calibration/coverage@5%": 0.01681112678895384, "calibration/ece": 0.1065982830413967, "calibration/mean_confidence": 0.6392839596712477, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015885416666666673, "completions/max_length": 3756.2, "completions/max_terminated_length": 3756.2, "completions/mean_length": 629.6125, "completions/mean_terminated_length": 639.7435302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 173.6, "epoch": 0.07199910001124986, "grad_norm": 0.0025920316111296415, "learning_rate": 3.5714285714285718e-06, "loss": -0.0323, "num_tokens": 53860196.0, "reward": 0.8962351679801941, "reward_std": 0.18339000940322875, "rewards/accuracy_reward": 0.6081597208976746, "rewards/brier_reward": 0.7350787162780762, "rewards/confidence_uniqueness_reward": 0.8857907652854919, "rewards/format_reward": 0.9823784708976746, "rewards/frontier_coverage_0": -0.024395102635025978, "rewards/frontier_coverage_1": -0.024395102635025978, "rewards/frontier_coverage_10": -0.024395102635025978, "rewards/frontier_coverage_15": -0.024395102635025978, "rewards/frontier_coverage_20": -0.024395102635025978, "rewards/frontier_coverage_25": -0.024395102635025978, "rewards/frontier_coverage_5": -0.024395102635025978, "rewards/frontier_entropy_batch_reward": -0.5867892920970916, "signal/accuracy_reward/centered_abs_mean": 0.2335611939430237, "signal/accuracy_reward/group_std_mean": 0.29245399236679076, "signal/accuracy_reward/group_zero_std_frac": 0.23055555522441865, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.932023000717163, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11678059697151184, "signal/advantage_abs_mean": 0.7472962260246276, "signal/advantage_pre_scale_abs_mean": 0.1410281091928482, "signal/advantage_pre_scale_std": 0.2018715351819992, "signal/advantage_std": 0.9836687922477723, "signal/brier_reward/centered_abs_mean": 0.1558063119649887, "signal/brier_reward/group_std_mean": 0.19716133773326874, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.12757135629653932, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015580631978809833, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07879094183444976, "signal/confidence_uniqueness_reward/group_std_mean": 0.10910149812698364, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.060947873443365094, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00787909417413175, "signal/format_reward/centered_abs_mean": 0.03021375872194767, "signal/format_reward/group_std_mean": 0.056160366535186766, "signal/format_reward/group_zero_std_frac": 0.7750000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.12186049222946167, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015106879360973834, "signal/frontier_coverage_0/centered_abs_mean": 0.12939045429229737, "signal/frontier_coverage_0/group_std_mean": 0.17340194284915925, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015195189043879509, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018502835649996997, "signal/frontier_coverage_1/centered_abs_mean": 0.12939045429229737, "signal/frontier_coverage_1/group_std_mean": 0.17340194284915925, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.015195189043879509, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018502835649996997, "signal/frontier_coverage_10/centered_abs_mean": 0.12939045429229737, "signal/frontier_coverage_10/group_std_mean": 0.17340194284915925, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015195189043879509, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018502835649996997, "signal/frontier_coverage_15/centered_abs_mean": 0.12939045429229737, "signal/frontier_coverage_15/group_std_mean": 0.17340194284915925, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015195189043879509, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018502835649996997, "signal/frontier_coverage_20/centered_abs_mean": 0.12939045429229737, "signal/frontier_coverage_20/group_std_mean": 0.17340194284915925, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015195189043879509, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018502835649996997, "signal/frontier_coverage_25/centered_abs_mean": 0.12939045429229737, "signal/frontier_coverage_25/group_std_mean": 0.17340194284915925, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015195189043879509, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018502835649996997, "signal/frontier_coverage_5/centered_abs_mean": 0.12939045429229737, "signal/frontier_coverage_5/group_std_mean": 0.17340194284915925, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015195189043879509, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018502835649996997, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4059493899345398, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4744054675102234, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.32940598130226134, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04059493914246559, "step": 30 }, { "calibration/aurc": 0.26349164441341744, "calibration/batch_distribution_entropy": 0.9722011088416853, "calibration/buffer_distribution_entropy": 0.7147490484587777, "calibration/confidence_entropy": 0.4716542473597789, "calibration/coverage@0%": 0.010033409527924178, "calibration/coverage@1%": 0.010033409527924178, "calibration/coverage@10%": 0.04262926925598449, "calibration/coverage@15%": 0.07320457015039024, "calibration/coverage@20%": 0.11014567619634501, "calibration/coverage@25%": 0.57302513015744, "calibration/coverage@30%": 0.8417191254787083, "calibration/coverage@5%": 0.010033409527924178, "calibration/ece": 0.2408688157230487, "calibration/mean_confidence": 0.5653689750040694, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02152777777777779, "completions/max_length": 3726.4, "completions/max_terminated_length": 3726.4, "completions/mean_length": 640.2627685546875, "completions/mean_terminated_length": 654.4486206054687, "completions/min_length": 0.0, "completions/min_terminated_length": 180.8, "epoch": 0.08399895001312484, "grad_norm": 0.00288687227293849, "learning_rate": 4.166666666666667e-06, "loss": -0.0519, "num_tokens": 64313463.0, "reward": 0.9324461936950683, "reward_std": 0.1782814681529999, "rewards/accuracy_reward": 0.6236111164093018, "rewards/brier_reward": 0.6862263441085815, "rewards/confidence_uniqueness_reward": 0.9285731554031372, "rewards/format_reward": 0.9761284589767456, "rewards/frontier_coverage_0": -0.047293629869818686, "rewards/frontier_coverage_1": -0.047293629869818686, "rewards/frontier_coverage_10": -0.047293629869818686, "rewards/frontier_coverage_15": -0.047293629869818686, "rewards/frontier_coverage_20": -0.047293629869818686, "rewards/frontier_coverage_25": -0.047293629869818686, "rewards/frontier_coverage_5": -0.047293629869818686, "rewards/frontier_entropy_batch_reward": -0.2416945517063141, "signal/accuracy_reward/centered_abs_mean": 0.2194552928209305, "signal/accuracy_reward/group_std_mean": 0.28137104511260985, "signal/accuracy_reward/group_zero_std_frac": 0.23055555522441865, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8508251547813416, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10972764641046524, "signal/advantage_abs_mean": 0.747930896282196, "signal/advantage_pre_scale_abs_mean": 0.13469484448432922, "signal/advantage_pre_scale_std": 0.19468034505844117, "signal/advantage_std": 0.983700430393219, "signal/brier_reward/centered_abs_mean": 0.2539998531341553, "signal/brier_reward/group_std_mean": 0.30160382986068723, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19768215715885162, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.025399985909461974, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.047507094591856, "signal/confidence_uniqueness_reward/group_std_mean": 0.08019827008247375, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03670351468026638, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004750709515064955, "signal/format_reward/centered_abs_mean": 0.0376356340944767, "signal/format_reward/group_std_mean": 0.06864920854568482, "signal/format_reward/group_zero_std_frac": 0.7277777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.14402690380811692, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01881781704723835, "signal/frontier_coverage_0/centered_abs_mean": 0.2695829331874847, "signal/frontier_coverage_0/group_std_mean": 0.3519932210445404, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029944488778710365, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0038550359196960924, "signal/frontier_coverage_1/centered_abs_mean": 0.2695829331874847, "signal/frontier_coverage_1/group_std_mean": 0.3519932210445404, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029944488778710365, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038550359196960924, "signal/frontier_coverage_10/centered_abs_mean": 0.2695829331874847, "signal/frontier_coverage_10/group_std_mean": 0.3519932210445404, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.029944488778710365, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038550359196960924, "signal/frontier_coverage_15/centered_abs_mean": 0.2695829331874847, "signal/frontier_coverage_15/group_std_mean": 0.3519932210445404, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.029944488778710365, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038550359196960924, "signal/frontier_coverage_20/centered_abs_mean": 0.2695829331874847, "signal/frontier_coverage_20/group_std_mean": 0.3519932210445404, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.029944488778710365, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038550359196960924, "signal/frontier_coverage_25/centered_abs_mean": 0.2695829331874847, "signal/frontier_coverage_25/group_std_mean": 0.3519932210445404, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029944488778710365, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038550359196960924, "signal/frontier_coverage_5/centered_abs_mean": 0.2695829331874847, "signal/frontier_coverage_5/group_std_mean": 0.3519932210445404, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.029944488778710365, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038550359196960924, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3358907103538513, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40937405824661255, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.26255030035972593, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033589070290327074, "step": 35 }, { "calibration/aurc": 0.2812089130282969, "calibration/batch_distribution_entropy": 0.9378820973722213, "calibration/buffer_distribution_entropy": 0.7788346026193121, "calibration/confidence_entropy": 0.5052747431569691, "calibration/coverage@0%": 0.011617889637609121, "calibration/coverage@1%": 0.011617889637609121, "calibration/coverage@10%": 0.02006814162348116, "calibration/coverage@15%": 0.04699503081949964, "calibration/coverage@20%": 0.21852156777167192, "calibration/coverage@25%": 0.3002575847416842, "calibration/coverage@30%": 0.6493828972901323, "calibration/coverage@5%": 0.011617889637609121, "calibration/ece": 0.18469349731283605, "calibration/mean_confidence": 0.6265628071196891, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017881944444444443, "completions/max_length": 3601.8, "completions/max_terminated_length": 3601.8, "completions/mean_length": 686.0500854492187, "completions/mean_terminated_length": 698.4910034179687, "completions/min_length": 0.0, "completions/min_terminated_length": 203.2, "epoch": 0.09599880001499982, "grad_norm": 0.0052315001375973225, "learning_rate": 4.761904761904762e-06, "loss": -0.0494, "num_tokens": 75336280.0, "reward": 0.9460026144981384, "reward_std": 0.16873830258846284, "rewards/accuracy_reward": 0.6516493201255799, "rewards/brier_reward": 0.7267241477966309, "rewards/confidence_uniqueness_reward": 0.9280878067016601, "rewards/format_reward": 0.9811632037162781, "rewards/frontier_coverage_0": -0.03641742318868637, "rewards/frontier_coverage_1": -0.03641742318868637, "rewards/frontier_coverage_10": -0.03641742318868637, "rewards/frontier_coverage_15": -0.03641742318868637, "rewards/frontier_coverage_20": -0.03641742318868637, "rewards/frontier_coverage_25": -0.03641742318868637, "rewards/frontier_coverage_5": -0.03641742318868637, "rewards/frontier_entropy_batch_reward": -0.3223945081233978, "signal/accuracy_reward/centered_abs_mean": 0.19728189706802368, "signal/accuracy_reward/group_std_mean": 0.257595032453537, "signal/accuracy_reward/group_zero_std_frac": 0.2861111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9428304195404053, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09864094853401184, "signal/advantage_abs_mean": 0.7440566301345826, "signal/advantage_pre_scale_abs_mean": 0.1264283075928688, "signal/advantage_pre_scale_std": 0.19226027727127076, "signal/advantage_std": 0.9835219383239746, "signal/brier_reward/centered_abs_mean": 0.2110671579837799, "signal/brier_reward/group_std_mean": 0.2574485570192337, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20208889842033387, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.021106715872883798, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04523418918251991, "signal/confidence_uniqueness_reward/group_std_mean": 0.07450791597366332, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.043287652730941775, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045234191231429575, "signal/format_reward/centered_abs_mean": 0.03200412429869175, "signal/format_reward/group_std_mean": 0.05935907438397407, "signal/format_reward/group_zero_std_frac": 0.7611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.15341382324695588, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016002062149345873, "signal/frontier_coverage_0/centered_abs_mean": 0.1926664799451828, "signal/frontier_coverage_0/group_std_mean": 0.26043030619621277, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02639569416642189, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002755130687728524, "signal/frontier_coverage_1/centered_abs_mean": 0.1926664799451828, "signal/frontier_coverage_1/group_std_mean": 0.26043030619621277, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02639569416642189, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002755130687728524, "signal/frontier_coverage_10/centered_abs_mean": 0.1926664799451828, "signal/frontier_coverage_10/group_std_mean": 0.26043030619621277, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02639569416642189, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002755130687728524, "signal/frontier_coverage_15/centered_abs_mean": 0.1926664799451828, "signal/frontier_coverage_15/group_std_mean": 0.26043030619621277, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02639569416642189, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002755130687728524, "signal/frontier_coverage_20/centered_abs_mean": 0.1926664799451828, "signal/frontier_coverage_20/group_std_mean": 0.26043030619621277, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02639569416642189, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002755130687728524, "signal/frontier_coverage_25/centered_abs_mean": 0.1926664799451828, "signal/frontier_coverage_25/group_std_mean": 0.26043030619621277, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02639569416642189, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002755130687728524, "signal/frontier_coverage_5/centered_abs_mean": 0.1926664799451828, "signal/frontier_coverage_5/group_std_mean": 0.26043030619621277, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02639569416642189, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002755130687728524, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38592681884765623, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4502368450164795, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.36914966702461244, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038592683523893355, "step": 40 }, { "calibration/aurc": 0.18182826496863658, "calibration/batch_distribution_entropy": 0.9293780132057007, "calibration/buffer_distribution_entropy": 0.8135788097220038, "calibration/confidence_entropy": 0.4995505573094066, "calibration/coverage@0%": 0.014873545187723889, "calibration/coverage@1%": 0.014873545187723889, "calibration/coverage@10%": 0.13937415970478578, "calibration/coverage@15%": 0.3561157115232597, "calibration/coverage@20%": 0.6580942309690091, "calibration/coverage@25%": 0.9673320588192078, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.05969262413509231, "calibration/ece": 0.16737471673810952, "calibration/mean_confidence": 0.626975023284362, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013888888888888905, "completions/max_length": 3784.0, "completions/max_terminated_length": 3784.0, "completions/mean_length": 739.9791625976562, "completions/mean_terminated_length": 750.4744018554687, "completions/min_length": 0.0, "completions/min_terminated_length": 252.6, "epoch": 0.1079986500168748, "grad_norm": 0.002621802967041731, "learning_rate": 4.909638554216868e-06, "loss": -0.0319, "num_tokens": 86996104.0, "reward": 0.9631305456161499, "reward_std": 0.1516349971294403, "rewards/accuracy_reward": 0.66796875, "rewards/brier_reward": 0.762067437171936, "rewards/confidence_uniqueness_reward": 0.9327586412429809, "rewards/format_reward": 0.98515625, "rewards/frontier_coverage_0": -0.008925668522715568, "rewards/frontier_coverage_1": -0.008925668522715568, "rewards/frontier_coverage_10": -0.008925668522715568, "rewards/frontier_coverage_15": -0.008925668522715568, "rewards/frontier_coverage_20": -0.008925668522715568, "rewards/frontier_coverage_25": -0.008925668522715568, "rewards/frontier_coverage_5": -0.008925668522715568, "rewards/frontier_entropy_batch_reward": -0.3202110558748245, "signal/accuracy_reward/centered_abs_mean": 0.18296983540058137, "signal/accuracy_reward/group_std_mean": 0.24478627741336823, "signal/accuracy_reward/group_zero_std_frac": 0.29722222983837127, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9449079632759094, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09148491770029069, "signal/advantage_abs_mean": 0.7456163048744202, "signal/advantage_pre_scale_abs_mean": 0.11277087926864623, "signal/advantage_pre_scale_std": 0.1716623306274414, "signal/advantage_std": 0.9834415912628174, "signal/brier_reward/centered_abs_mean": 0.19162435531616212, "signal/brier_reward/group_std_mean": 0.23700920343399048, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19831233322620392, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019162436202168464, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.037688417732715605, "signal/confidence_uniqueness_reward/group_std_mean": 0.06137025505304337, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03864099867641926, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037688419222831728, "signal/format_reward/centered_abs_mean": 0.02428927905857563, "signal/format_reward/group_std_mean": 0.04575216062366962, "signal/format_reward/group_zero_std_frac": 0.8083333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1223247617483139, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012144639529287815, "signal/frontier_coverage_0/centered_abs_mean": 0.20233065783977508, "signal/frontier_coverage_0/group_std_mean": 0.2709280252456665, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029919801652431487, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028933283407241105, "signal/frontier_coverage_1/centered_abs_mean": 0.20233065783977508, "signal/frontier_coverage_1/group_std_mean": 0.2709280252456665, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029919801652431487, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028933283407241105, "signal/frontier_coverage_10/centered_abs_mean": 0.20233065783977508, "signal/frontier_coverage_10/group_std_mean": 0.2709280252456665, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.029919801652431487, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028933283407241105, "signal/frontier_coverage_15/centered_abs_mean": 0.20233065783977508, "signal/frontier_coverage_15/group_std_mean": 0.2709280252456665, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.029919801652431487, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028933283407241105, "signal/frontier_coverage_20/centered_abs_mean": 0.20233065783977508, "signal/frontier_coverage_20/group_std_mean": 0.2709280252456665, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.029919801652431487, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028933283407241105, "signal/frontier_coverage_25/centered_abs_mean": 0.20233065783977508, "signal/frontier_coverage_25/group_std_mean": 0.2709280252456665, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029919801652431487, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028933283407241105, "signal/frontier_coverage_5/centered_abs_mean": 0.20233065783977508, "signal/frontier_coverage_5/group_std_mean": 0.2709280252456665, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.029919801652431487, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028933283407241105, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37160670161247256, "signal/frontier_entropy_batch_reward/group_std_mean": 0.43908803462982177, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.38857935070991517, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03716067224740982, "step": 45 }, { "calibration/aurc": 0.355543148661393, "calibration/batch_distribution_entropy": 0.9560598149241851, "calibration/buffer_distribution_entropy": 0.8409339110002637, "calibration/confidence_entropy": 0.46758599740547824, "calibration/coverage@0%": 0.0110337334407132, "calibration/coverage@1%": 0.0110337334407132, "calibration/coverage@10%": 0.011561437926201327, "calibration/coverage@15%": 0.03893921379876839, "calibration/coverage@20%": 0.09381513646509912, "calibration/coverage@25%": 0.2094168255802026, "calibration/coverage@30%": 0.4347323189967276, "calibration/coverage@5%": 0.0110337334407132, "calibration/ece": 0.1621294484579252, "calibration/mean_confidence": 0.5582715356184847, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009895833333333348, "completions/max_length": 3273.0, "completions/max_terminated_length": 3273.0, "completions/mean_length": 710.214501953125, "completions/mean_terminated_length": 717.2854614257812, "completions/min_length": 0.0, "completions/min_terminated_length": 228.6, "epoch": 0.11999850001874976, "grad_norm": 0.003402084344998002, "learning_rate": 4.759036144578314e-06, "loss": -0.0296, "num_tokens": 98275375.0, "reward": 0.9638577103614807, "reward_std": 0.14062503576278687, "rewards/accuracy_reward": 0.6588541626930237, "rewards/brier_reward": 0.7617009282112122, "rewards/confidence_uniqueness_reward": 0.9369927644729614, "rewards/format_reward": 0.9899305582046509, "rewards/frontier_coverage_0": 0.007022621482610703, "rewards/frontier_coverage_1": 0.007022621482610703, "rewards/frontier_coverage_10": 0.007022621482610703, "rewards/frontier_coverage_15": 0.007022621482610703, "rewards/frontier_coverage_20": 0.007022621482610703, "rewards/frontier_coverage_25": 0.007022621482610703, "rewards/frontier_coverage_5": 0.007022621482610703, "rewards/frontier_entropy_batch_reward": -0.3110700786113739, "signal/accuracy_reward/centered_abs_mean": 0.1725911468267441, "signal/accuracy_reward/group_std_mean": 0.22528342604637147, "signal/accuracy_reward/group_zero_std_frac": 0.3611111223697662, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9741186976432801, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08629557341337205, "signal/advantage_abs_mean": 0.7553925156593323, "signal/advantage_pre_scale_abs_mean": 0.10563597530126571, "signal/advantage_pre_scale_std": 0.16184936761856078, "signal/advantage_std": 0.9833512544631958, "signal/brier_reward/centered_abs_mean": 0.18118281662464142, "signal/brier_reward/group_std_mean": 0.22774460315704345, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20505461990833282, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01811828128993511, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031979148462414744, "signal/confidence_uniqueness_reward/group_std_mean": 0.05420147180557251, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03622420057654381, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031979148741811516, "signal/format_reward/centered_abs_mean": 0.017957899160683154, "signal/format_reward/group_std_mean": 0.037631581723690036, "signal/format_reward/group_zero_std_frac": 0.830555546283722, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10187934935092927, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008978949580341577, "signal/frontier_coverage_0/centered_abs_mean": 0.21007080078125, "signal/frontier_coverage_0/group_std_mean": 0.27531993985176084, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034024206921458244, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030040125828236343, "signal/frontier_coverage_1/centered_abs_mean": 0.21007080078125, "signal/frontier_coverage_1/group_std_mean": 0.27531993985176084, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034024206921458244, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030040125828236343, "signal/frontier_coverage_10/centered_abs_mean": 0.21007080078125, "signal/frontier_coverage_10/group_std_mean": 0.27531993985176084, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.034024206921458244, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030040125828236343, "signal/frontier_coverage_15/centered_abs_mean": 0.21007080078125, "signal/frontier_coverage_15/group_std_mean": 0.27531993985176084, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.034024206921458244, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030040125828236343, "signal/frontier_coverage_20/centered_abs_mean": 0.21007080078125, "signal/frontier_coverage_20/group_std_mean": 0.27531993985176084, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.034024206921458244, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030040125828236343, "signal/frontier_coverage_25/centered_abs_mean": 0.21007080078125, "signal/frontier_coverage_25/group_std_mean": 0.27531993985176084, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.034024206921458244, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030040125828236343, "signal/frontier_coverage_5/centered_abs_mean": 0.21007080078125, "signal/frontier_coverage_5/group_std_mean": 0.27531993985176084, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.034024206921458244, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030040125828236343, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36411572694778443, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4315321445465088, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4120087444782257, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03641157373785973, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.18507757147473192, "eval_calibration/batch_distribution_entropy": 0.9183594624506147, "eval_calibration/buffer_distribution_entropy": 0.8576227601929546, "eval_calibration/confidence_entropy": 0.5061289053377749, "eval_calibration/coverage@0%": 0.17943548387096775, "eval_calibration/coverage@1%": 0.17943548387096775, "eval_calibration/coverage@10%": 0.3020833333333333, "eval_calibration/coverage@15%": 0.4437163978494623, "eval_calibration/coverage@20%": 0.6270161290322581, "eval_calibration/coverage@25%": 0.8429099462365591, "eval_calibration/coverage@30%": 0.9479166666666666, "eval_calibration/coverage@5%": 0.17943548387096775, "eval_calibration/ece": 0.232350307883931, "eval_calibration/mean_confidence": 0.5651359238441985, "eval_completions/clipped_ratio": 0.009375000000000003, "eval_completions/max_length": 2114.1666666666665, "eval_completions/max_terminated_length": 2114.1666666666665, "eval_completions/mean_length": 693.6932373046875, "eval_completions/mean_terminated_length": 700.2240702311198, "eval_completions/min_length": 72.33333333333333, "eval_completions/min_terminated_length": 265.6666666666667, "eval_loss": 0.0, "eval_num_tokens": 98275375.0, "eval_reward": 0.8912924925486246, "eval_reward_std": 0.23265416423479715, "eval_rewards/accuracy_reward": 0.6527777711550394, "eval_rewards/brier_reward": 0.7764979799588522, "eval_rewards/confidence_uniqueness_reward": 0.8910275399684906, "eval_rewards/format_reward": 0.9930555522441864, "eval_rewards/frontier_coverage_0": 0.009279087030639252, "eval_rewards/frontier_coverage_1": 0.009279087030639252, "eval_rewards/frontier_coverage_10": 0.009279087030639252, "eval_rewards/frontier_coverage_15": 0.009279087030639252, "eval_rewards/frontier_coverage_20": 0.009279087030639252, "eval_rewards/frontier_coverage_25": 0.009279087030639252, "eval_rewards/frontier_coverage_5": 0.009279087030639252, "eval_rewards/frontier_entropy_batch_reward": -0.9930555522441864, "eval_runtime": 173.0155, "eval_samples_per_second": 5.78, "eval_signal/accuracy_reward/centered_abs_mean": 0.4415147602558136, "eval_signal/accuracy_reward/group_std_mean": 0.4768268217643102, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9577702283859253, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2207573801279068, "eval_signal/advantage_abs_mean": 0.8893506626288096, "eval_signal/advantage_pre_scale_abs_mean": 0.2068815752863884, "eval_signal/advantage_pre_scale_std": 0.2305774266521136, "eval_signal/advantage_std": 0.9863962332407633, "eval_signal/brier_reward/centered_abs_mean": 0.19041885187228522, "eval_signal/brier_reward/group_std_mean": 0.2465388998389244, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08267416805028915, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019041885621845722, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.047221081952253975, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07311302361389001, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02038925824066003, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004722108171942334, "eval_signal/format_reward/centered_abs_mean": 0.013454860852410397, "eval_signal/format_reward/group_std_mean": 0.03928370991100868, "eval_signal/format_reward/group_zero_std_frac": 0.7777777910232544, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.028587787101666134, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.006727430426205198, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.24869261930386224, "eval_signal/frontier_coverage_0/group_std_mean": 0.35074693461259204, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015456531352053085, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035563044948503375, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.24869261930386224, "eval_signal/frontier_coverage_1/group_std_mean": 0.35074693461259204, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.015456531352053085, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035563044948503375, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.24869261930386224, "eval_signal/frontier_coverage_10/group_std_mean": 0.35074693461259204, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015456531352053085, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035563044948503375, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.24869261930386224, "eval_signal/frontier_coverage_15/group_std_mean": 0.35074693461259204, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015456531352053085, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035563044948503375, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.24869261930386224, "eval_signal/frontier_coverage_20/group_std_mean": 0.35074693461259204, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015456531352053085, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035563044948503375, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.24869261930386224, "eval_signal/frontier_coverage_25/group_std_mean": 0.35074693461259204, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015456531352053085, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035563044948503375, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.24869261930386224, "eval_signal/frontier_coverage_5/group_std_mean": 0.35074693461259204, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015456531352053085, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035563044948503375, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.013454860852410397, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.03928370991100868, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7777777910232544, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.005717557234068711, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0013454861667317648, "eval_steps_per_second": 0.035, "step": 50 }, { "calibration/aurc": 0.2572277962003569, "calibration/batch_distribution_entropy": 0.9547779041044873, "calibration/buffer_distribution_entropy": 0.8663927585115305, "calibration/confidence_entropy": 0.5224142559254161, "calibration/coverage@0%": 0.010124414417387751, "calibration/coverage@1%": 0.010124414417387751, "calibration/coverage@10%": 0.02617453887022441, "calibration/coverage@15%": 0.19295365729541794, "calibration/coverage@20%": 0.3283553771989877, "calibration/coverage@25%": 0.5176317108323689, "calibration/coverage@30%": 0.7306122575305988, "calibration/coverage@5%": 0.010124414417387751, "calibration/ece": 0.13155749677237122, "calibration/mean_confidence": 0.5880504501565289, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009461805555555581, "completions/max_length": 3255.2, "completions/max_terminated_length": 3255.2, "completions/mean_length": 727.8955688476562, "completions/mean_terminated_length": 734.931005859375, "completions/min_length": 0.0, "completions/min_terminated_length": 191.4, "epoch": 0.13199835002062474, "grad_norm": 0.0026916302740573883, "learning_rate": 4.60843373493976e-06, "loss": -0.0219, "num_tokens": 109741308.0, "reward": 0.9618936777114868, "reward_std": 0.13674592971801758, "rewards/accuracy_reward": 0.6470486044883728, "rewards/brier_reward": 0.7674099326133728, "rewards/confidence_uniqueness_reward": 0.9404424667358399, "rewards/format_reward": 0.9904513955116272, "rewards/frontier_coverage_0": 0.001309068128466606, "rewards/frontier_coverage_1": 0.001309068128466606, "rewards/frontier_coverage_10": 0.001309068128466606, "rewards/frontier_coverage_15": 0.001309068128466606, "rewards/frontier_coverage_20": 0.001309068128466606, "rewards/frontier_coverage_25": 0.001309068128466606, "rewards/frontier_coverage_5": 0.001309068128466606, "rewards/frontier_entropy_batch_reward": -0.2777259886264801, "signal/accuracy_reward/centered_abs_mean": 0.16701388657093047, "signal/accuracy_reward/group_std_mean": 0.2208912193775177, "signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9170358538627624, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08350694328546523, "signal/advantage_abs_mean": 0.7515268087387085, "signal/advantage_pre_scale_abs_mean": 0.10258873105049134, "signal/advantage_pre_scale_std": 0.15661307275295258, "signal/advantage_std": 0.9833774209022522, "signal/brier_reward/centered_abs_mean": 0.16898567974567413, "signal/brier_reward/group_std_mean": 0.21254501342773438, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18657754361629486, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016898567974567413, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02872337996959686, "signal/confidence_uniqueness_reward/group_std_mean": 0.047750599682331085, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03172791600227356, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028723380994051693, "signal/format_reward/centered_abs_mean": 0.016373698227107526, "signal/format_reward/group_std_mean": 0.032994627952575684, "signal/format_reward/group_zero_std_frac": 0.8583333373069764, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09072078242897988, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008186849113553763, "signal/frontier_coverage_0/centered_abs_mean": 0.19720979034900665, "signal/frontier_coverage_0/group_std_mean": 0.2574134826660156, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.031110198795795442, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028200999833643435, "signal/frontier_coverage_1/centered_abs_mean": 0.19720979034900665, "signal/frontier_coverage_1/group_std_mean": 0.2574134826660156, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.031110198795795442, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028200999833643435, "signal/frontier_coverage_10/centered_abs_mean": 0.19720979034900665, "signal/frontier_coverage_10/group_std_mean": 0.2574134826660156, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.031110198795795442, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028200999833643435, "signal/frontier_coverage_15/centered_abs_mean": 0.19720979034900665, "signal/frontier_coverage_15/group_std_mean": 0.2574134826660156, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.031110198795795442, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028200999833643435, "signal/frontier_coverage_20/centered_abs_mean": 0.19720979034900665, "signal/frontier_coverage_20/group_std_mean": 0.2574134826660156, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.031110198795795442, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028200999833643435, "signal/frontier_coverage_25/centered_abs_mean": 0.19720979034900665, "signal/frontier_coverage_25/group_std_mean": 0.2574134826660156, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031110198795795442, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028200999833643435, "signal/frontier_coverage_5/centered_abs_mean": 0.19720979034900665, "signal/frontier_coverage_5/group_std_mean": 0.2574134826660156, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.031110198795795442, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028200999833643435, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3412093102931976, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41135616302490235, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3766399085521698, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034120932966470716, "step": 55 }, { "calibration/aurc": 0.27743387615755427, "calibration/batch_distribution_entropy": 0.9583523833618678, "calibration/buffer_distribution_entropy": 0.8804019885998695, "calibration/confidence_entropy": 0.466344529326517, "calibration/coverage@0%": 0.013651509291601752, "calibration/coverage@1%": 0.013651509291601752, "calibration/coverage@10%": 0.15538102293714737, "calibration/coverage@15%": 0.37334421356472086, "calibration/coverage@20%": 0.45284050566706346, "calibration/coverage@25%": 0.5141313051302937, "calibration/coverage@30%": 0.6727813439434129, "calibration/coverage@5%": 0.019410671595266674, "calibration/ece": 0.1604155313309958, "calibration/mean_confidence": 0.5627900217322679, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01449652777777779, "completions/max_length": 3593.0, "completions/max_terminated_length": 3593.0, "completions/mean_length": 727.8925415039063, "completions/mean_terminated_length": 738.712353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 185.8, "epoch": 0.14399820002249972, "grad_norm": 0.0024423820432275534, "learning_rate": 4.457831325301205e-06, "loss": -0.0326, "num_tokens": 121223206.0, "reward": 0.9519212007522583, "reward_std": 0.14070754647254943, "rewards/accuracy_reward": 0.6276041746139527, "rewards/brier_reward": 0.762453269958496, "rewards/confidence_uniqueness_reward": 0.9355636954307556, "rewards/format_reward": 0.9853298425674438, "rewards/frontier_coverage_0": 0.025408835709095003, "rewards/frontier_coverage_1": 0.025408835709095003, "rewards/frontier_coverage_10": 0.025408835709095003, "rewards/frontier_coverage_15": 0.025408835709095003, "rewards/frontier_coverage_20": 0.025408835709095003, "rewards/frontier_coverage_25": 0.025408835709095003, "rewards/frontier_coverage_5": 0.025408835709095003, "rewards/frontier_entropy_batch_reward": -0.2689092069864273, "signal/accuracy_reward/centered_abs_mean": 0.1701822906732559, "signal/accuracy_reward/group_std_mean": 0.2256343573331833, "signal/accuracy_reward/group_zero_std_frac": 0.3583333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9568945646286011, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08509114533662795, "signal/advantage_abs_mean": 0.7413867354393006, "signal/advantage_pre_scale_abs_mean": 0.10248211473226547, "signal/advantage_pre_scale_std": 0.16330770254135132, "signal/advantage_std": 0.9833378672599793, "signal/brier_reward/centered_abs_mean": 0.18149828016757966, "signal/brier_reward/group_std_mean": 0.22987159788608552, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20609477162361145, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018149828910827635, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03701090067625046, "signal/confidence_uniqueness_reward/group_std_mean": 0.06427684798836708, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04217044934630394, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037010901141911745, "signal/format_reward/centered_abs_mean": 0.02552625834941864, "signal/format_reward/group_std_mean": 0.05108080431818962, "signal/format_reward/group_zero_std_frac": 0.7805555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.14410489052534103, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01276312917470932, "signal/frontier_coverage_0/centered_abs_mean": 0.22716614007949829, "signal/frontier_coverage_0/group_std_mean": 0.2958860158920288, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03683609813451767, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003248475771397352, "signal/frontier_coverage_1/centered_abs_mean": 0.22716614007949829, "signal/frontier_coverage_1/group_std_mean": 0.2958860158920288, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03683609813451767, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003248475771397352, "signal/frontier_coverage_10/centered_abs_mean": 0.22716614007949829, "signal/frontier_coverage_10/group_std_mean": 0.2958860158920288, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03683609813451767, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003248475771397352, "signal/frontier_coverage_15/centered_abs_mean": 0.22716614007949829, "signal/frontier_coverage_15/group_std_mean": 0.2958860158920288, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03683609813451767, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003248475771397352, "signal/frontier_coverage_20/centered_abs_mean": 0.22716614007949829, "signal/frontier_coverage_20/group_std_mean": 0.2958860158920288, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03683609813451767, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003248475771397352, "signal/frontier_coverage_25/centered_abs_mean": 0.22716614007949829, "signal/frontier_coverage_25/group_std_mean": 0.2958860158920288, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03683609813451767, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003248475771397352, "signal/frontier_coverage_5/centered_abs_mean": 0.22716614007949829, "signal/frontier_coverage_5/group_std_mean": 0.2958860158920288, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03683609813451767, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003248475771397352, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3329376816749573, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4049929976463318, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3823388457298279, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03329376950860023, "step": 60 }, { "calibration/aurc": 0.24187817445696086, "calibration/batch_distribution_entropy": 0.9537951149289101, "calibration/buffer_distribution_entropy": 0.895752842577633, "calibration/confidence_entropy": 0.5066892649603131, "calibration/coverage@0%": 0.014859234868555388, "calibration/coverage@1%": 0.014859234868555388, "calibration/coverage@10%": 0.1557253084507572, "calibration/coverage@15%": 0.22767872436187964, "calibration/coverage@20%": 0.5952973900478373, "calibration/coverage@25%": 0.6492208361304835, "calibration/coverage@30%": 0.7082545960594742, "calibration/coverage@5%": 0.07725923486855539, "calibration/ece": 0.14400525368708186, "calibration/mean_confidence": 0.5951536858486229, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011979166666666652, "completions/max_length": 3382.8, "completions/max_terminated_length": 3382.8, "completions/mean_length": 631.187939453125, "completions/mean_terminated_length": 638.8400756835938, "completions/min_length": 0.0, "completions/min_terminated_length": 141.2, "epoch": 0.1559980500243747, "grad_norm": 0.0027344543486833572, "learning_rate": 4.307228915662651e-06, "loss": -0.0313, "num_tokens": 131588539.0, "reward": 0.9630724668502808, "reward_std": 0.13918745368719102, "rewards/accuracy_reward": 0.6561631917953491, "rewards/brier_reward": 0.7739776849746705, "rewards/confidence_uniqueness_reward": 0.9360240459442138, "rewards/format_reward": 0.9877604246139526, "rewards/frontier_coverage_0": 0.011109796725213528, "rewards/frontier_coverage_1": 0.011109796725213528, "rewards/frontier_coverage_10": 0.011109796725213528, "rewards/frontier_coverage_15": 0.011109796725213528, "rewards/frontier_coverage_20": 0.011109796725213528, "rewards/frontier_coverage_25": 0.011109796725213528, "rewards/frontier_coverage_5": 0.011109796725213528, "rewards/frontier_entropy_batch_reward": -0.31001612544059753, "signal/accuracy_reward/centered_abs_mean": 0.15845811367034912, "signal/accuracy_reward/group_std_mean": 0.21250716745853424, "signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9355194449424744, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07922905683517456, "signal/advantage_abs_mean": 0.7518455505371093, "signal/advantage_pre_scale_abs_mean": 0.10312150418758392, "signal/advantage_pre_scale_std": 0.1631181061267853, "signal/advantage_std": 0.9832925438880921, "signal/brier_reward/centered_abs_mean": 0.17200563251972198, "signal/brier_reward/group_std_mean": 0.21544553339481354, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20448561310768126, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017200562357902526, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03373164795339108, "signal/confidence_uniqueness_reward/group_std_mean": 0.05614056885242462, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04000279903411865, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033731648698449137, "signal/format_reward/centered_abs_mean": 0.02146809957921505, "signal/format_reward/group_std_mean": 0.04147007092833519, "signal/format_reward/group_zero_std_frac": 0.8277777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.12673527002334595, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010734049789607524, "signal/frontier_coverage_0/centered_abs_mean": 0.18637515604496002, "signal/frontier_coverage_0/group_std_mean": 0.24511989057064057, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03161940351128578, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026651647873222827, "signal/frontier_coverage_1/centered_abs_mean": 0.18637515604496002, "signal/frontier_coverage_1/group_std_mean": 0.24511989057064057, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03161940351128578, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026651647873222827, "signal/frontier_coverage_10/centered_abs_mean": 0.18637515604496002, "signal/frontier_coverage_10/group_std_mean": 0.24511989057064057, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03161940351128578, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026651647873222827, "signal/frontier_coverage_15/centered_abs_mean": 0.18637515604496002, "signal/frontier_coverage_15/group_std_mean": 0.24511989057064057, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03161940351128578, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026651647873222827, "signal/frontier_coverage_20/centered_abs_mean": 0.18637515604496002, "signal/frontier_coverage_20/group_std_mean": 0.24511989057064057, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03161940351128578, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026651647873222827, "signal/frontier_coverage_25/centered_abs_mean": 0.18637515604496002, "signal/frontier_coverage_25/group_std_mean": 0.24511989057064057, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03161940351128578, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026651647873222827, "signal/frontier_coverage_5/centered_abs_mean": 0.18637515604496002, "signal/frontier_coverage_5/group_std_mean": 0.24511989057064057, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03161940351128578, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026651647873222827, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35859541296958924, "signal/frontier_entropy_batch_reward/group_std_mean": 0.42603600025177, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4278856158256531, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035859542340040206, "step": 65 }, { "calibration/aurc": 0.31177324033790227, "calibration/batch_distribution_entropy": 0.9377009466084179, "calibration/buffer_distribution_entropy": 0.9052739502005067, "calibration/confidence_entropy": 0.4595234811538427, "calibration/coverage@0%": 0.0125507308012776, "calibration/coverage@1%": 0.0125507308012776, "calibration/coverage@10%": 0.025171344836365316, "calibration/coverage@15%": 0.17121301150303198, "calibration/coverage@20%": 0.2777535816784706, "calibration/coverage@25%": 0.31907901969633845, "calibration/coverage@30%": 0.48218757232791737, "calibration/coverage@5%": 0.0125507308012776, "calibration/ece": 0.22044916226110992, "calibration/mean_confidence": 0.5375950511256701, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0066840277777777905, "completions/max_length": 3311.8, "completions/max_terminated_length": 3311.8, "completions/mean_length": 603.350341796875, "completions/mean_terminated_length": 607.4075073242187, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.16799790002624967, "grad_norm": 0.002717731287702918, "learning_rate": 4.156626506024097e-06, "loss": -0.0122, "num_tokens": 141617279.0, "reward": 0.9516151428222657, "reward_std": 0.12674596905708313, "rewards/accuracy_reward": 0.6421875, "rewards/brier_reward": 0.7418337464332581, "rewards/confidence_uniqueness_reward": 0.9369692325592041, "rewards/format_reward": 0.9933159828186036, "rewards/frontier_coverage_0": 0.0047592608723789455, "rewards/frontier_coverage_1": 0.0047592608723789455, "rewards/frontier_coverage_10": 0.0047592608723789455, "rewards/frontier_coverage_15": 0.0047592608723789455, "rewards/frontier_coverage_20": 0.0047592608723789455, "rewards/frontier_coverage_25": 0.0047592608723789455, "rewards/frontier_coverage_5": 0.0047592608723789455, "rewards/frontier_entropy_batch_reward": -0.34493361711502074, "signal/accuracy_reward/centered_abs_mean": 0.16208766996860505, "signal/accuracy_reward/group_std_mean": 0.2136603981256485, "signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9817174792289733, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08104383498430252, "signal/advantage_abs_mean": 0.7454643368721008, "signal/advantage_pre_scale_abs_mean": 0.09447728544473648, "signal/advantage_pre_scale_std": 0.14597638845443725, "signal/advantage_std": 0.9832653284072876, "signal/brier_reward/centered_abs_mean": 0.19506115317344666, "signal/brier_reward/group_std_mean": 0.24160505831241608, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.23666558563709258, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019506115466356277, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02871289774775505, "signal/confidence_uniqueness_reward/group_std_mean": 0.045819585025310514, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0351563211530447, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002871289849281311, "signal/format_reward/centered_abs_mean": 0.01219075545668602, "signal/format_reward/group_std_mean": 0.026185811311006547, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.07440270856022835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00609537772834301, "signal/frontier_coverage_0/centered_abs_mean": 0.2429557830095291, "signal/frontier_coverage_0/group_std_mean": 0.31135170757770536, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04189819991588593, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034742677584290505, "signal/frontier_coverage_1/centered_abs_mean": 0.2429557830095291, "signal/frontier_coverage_1/group_std_mean": 0.31135170757770536, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04189819991588593, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034742677584290505, "signal/frontier_coverage_10/centered_abs_mean": 0.2429557830095291, "signal/frontier_coverage_10/group_std_mean": 0.31135170757770536, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04189819991588593, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034742677584290505, "signal/frontier_coverage_15/centered_abs_mean": 0.2429557830095291, "signal/frontier_coverage_15/group_std_mean": 0.31135170757770536, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04189819991588593, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034742677584290505, "signal/frontier_coverage_20/centered_abs_mean": 0.2429557830095291, "signal/frontier_coverage_20/group_std_mean": 0.31135170757770536, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04189819991588593, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034742677584290505, "signal/frontier_coverage_25/centered_abs_mean": 0.2429557830095291, "signal/frontier_coverage_25/group_std_mean": 0.31135170757770536, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04189819991588593, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034742677584290505, "signal/frontier_coverage_5/centered_abs_mean": 0.2429557830095291, "signal/frontier_coverage_5/group_std_mean": 0.31135170757770536, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04189819991588593, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034742677584290505, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3646996796131134, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4328969597816467, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4460917890071869, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03646996915340424, "step": 70 }, { "calibration/aurc": 0.3101440805164673, "calibration/batch_distribution_entropy": 0.9050061498877229, "calibration/buffer_distribution_entropy": 0.9116176363911908, "calibration/confidence_entropy": 0.5082916114231081, "calibration/coverage@0%": 0.00576069634986306, "calibration/coverage@1%": 0.00576069634986306, "calibration/coverage@10%": 0.00576069634986306, "calibration/coverage@15%": 0.18960560085429345, "calibration/coverage@20%": 0.283362673405381, "calibration/coverage@25%": 0.4146926660345467, "calibration/coverage@30%": 0.41784227233375937, "calibration/coverage@5%": 0.00576069634986306, "calibration/ece": 0.197117675594798, "calibration/mean_confidence": 0.661174791007752, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004861111111111138, "completions/max_length": 2847.6, "completions/max_terminated_length": 2847.6, "completions/mean_length": 612.6124267578125, "completions/mean_terminated_length": 615.6407348632813, "completions/min_length": 0.0, "completions/min_terminated_length": 173.8, "epoch": 0.17999775002812465, "grad_norm": 0.0024783292319625616, "learning_rate": 4.006024096385543e-06, "loss": -0.0033, "num_tokens": 151739470.0, "reward": 0.966460108757019, "reward_std": 0.13642458617687225, "rewards/accuracy_reward": 0.6873263835906982, "rewards/brier_reward": 0.7391559958457947, "rewards/confidence_uniqueness_reward": 0.9389071345329285, "rewards/format_reward": 0.9947048544883728, "rewards/frontier_coverage_0": -0.0546910285949707, "rewards/frontier_coverage_1": -0.0546910285949707, "rewards/frontier_coverage_10": -0.0546910285949707, "rewards/frontier_coverage_15": -0.0546910285949707, "rewards/frontier_coverage_20": -0.0546910285949707, "rewards/frontier_coverage_25": -0.0546910285949707, "rewards/frontier_coverage_5": -0.0546910285949707, "rewards/frontier_entropy_batch_reward": -0.36887272596359255, "signal/accuracy_reward/centered_abs_mean": 0.15999349057674409, "signal/accuracy_reward/group_std_mean": 0.20950167179107665, "signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9744468212127686, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07999674528837204, "signal/advantage_abs_mean": 0.7634802103042603, "signal/advantage_pre_scale_abs_mean": 0.10433225780725479, "signal/advantage_pre_scale_std": 0.15965070724487304, "signal/advantage_std": 0.9832550525665283, "signal/brier_reward/centered_abs_mean": 0.19047823250293733, "signal/brier_reward/group_std_mean": 0.2352720856666565, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.23357610106468202, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019047823548316956, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02479529082775116, "signal/confidence_uniqueness_reward/group_std_mean": 0.04085197448730469, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030193888396024705, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002479529147967696, "signal/format_reward/centered_abs_mean": 0.009879557183012366, "signal/format_reward/group_std_mean": 0.022797855362296105, "signal/format_reward/group_zero_std_frac": 0.8916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05883842520415783, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004939778591506183, "signal/frontier_coverage_0/centered_abs_mean": 0.16593956649303437, "signal/frontier_coverage_0/group_std_mean": 0.2201917886734009, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02901824899017811, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023729358334094288, "signal/frontier_coverage_1/centered_abs_mean": 0.16593956649303437, "signal/frontier_coverage_1/group_std_mean": 0.2201917886734009, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02901824899017811, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023729358334094288, "signal/frontier_coverage_10/centered_abs_mean": 0.16593956649303437, "signal/frontier_coverage_10/group_std_mean": 0.2201917886734009, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02901824899017811, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023729358334094288, "signal/frontier_coverage_15/centered_abs_mean": 0.16593956649303437, "signal/frontier_coverage_15/group_std_mean": 0.2201917886734009, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02901824899017811, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023729358334094288, "signal/frontier_coverage_20/centered_abs_mean": 0.16593956649303437, "signal/frontier_coverage_20/group_std_mean": 0.2201917886734009, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02901824899017811, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023729358334094288, "signal/frontier_coverage_25/centered_abs_mean": 0.16593956649303437, "signal/frontier_coverage_25/group_std_mean": 0.2201917886734009, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02901824899017811, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023729358334094288, "signal/frontier_coverage_5/centered_abs_mean": 0.16593956649303437, "signal/frontier_coverage_5/group_std_mean": 0.2201917886734009, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02901824899017811, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023729358334094288, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3872749865055084, "signal/frontier_entropy_batch_reward/group_std_mean": 0.44775003790855405, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4765858590602875, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038727499544620514, "step": 75 }, { "calibration/aurc": 0.24882971050558633, "calibration/batch_distribution_entropy": 0.9163427844868991, "calibration/buffer_distribution_entropy": 0.9156692618801193, "calibration/confidence_entropy": 0.5399646911523172, "calibration/coverage@0%": 0.0041666666666666675, "calibration/coverage@1%": 0.0041666666666666675, "calibration/coverage@10%": 0.01832759186351706, "calibration/coverage@15%": 0.2557291666666667, "calibration/coverage@20%": 0.40364583333333337, "calibration/coverage@25%": 0.6344480340606008, "calibration/coverage@30%": 0.7081002920035939, "calibration/coverage@5%": 0.0041666666666666675, "calibration/ece": 0.1941888822566929, "calibration/mean_confidence": 0.6331814034577334, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00868055555555558, "completions/max_length": 3604.4, "completions/max_terminated_length": 3604.4, "completions/mean_length": 645.2096435546875, "completions/mean_terminated_length": 650.9556030273437, "completions/min_length": 0.0, "completions/min_terminated_length": 125.2, "epoch": 0.19199760002999963, "grad_norm": 0.002496064407750964, "learning_rate": 3.855421686746989e-06, "loss": -0.0084, "num_tokens": 162225565.0, "reward": 0.9505057215690613, "reward_std": 0.1393027275800705, "rewards/accuracy_reward": 0.6585069298744202, "rewards/brier_reward": 0.729674780368805, "rewards/confidence_uniqueness_reward": 0.9360662698745728, "rewards/format_reward": 0.9909722208976746, "rewards/frontier_coverage_0": -0.051046742522157726, "rewards/frontier_coverage_1": -0.051046742522157726, "rewards/frontier_coverage_10": -0.051046742522157726, "rewards/frontier_coverage_15": -0.051046742522157726, "rewards/frontier_coverage_20": -0.051046742522157726, "rewards/frontier_coverage_25": -0.051046742522157726, "rewards/frontier_coverage_5": -0.051046742522157726, "rewards/frontier_entropy_batch_reward": -0.35698198080062865, "signal/accuracy_reward/centered_abs_mean": 0.1613064229488373, "signal/accuracy_reward/group_std_mean": 0.21208280324935913, "signal/accuracy_reward/group_zero_std_frac": 0.40555556416511535, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9074809789657593, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08065321147441865, "signal/advantage_abs_mean": 0.7607282400131226, "signal/advantage_pre_scale_abs_mean": 0.10646351128816604, "signal/advantage_pre_scale_std": 0.16107785999774932, "signal/advantage_std": 0.9833507299423218, "signal/brier_reward/centered_abs_mean": 0.18236831128597258, "signal/brier_reward/group_std_mean": 0.22466041147708893, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20613384544849395, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018236831203103064, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026830673590302466, "signal/confidence_uniqueness_reward/group_std_mean": 0.04254492111504078, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030164846032857896, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002683067345060408, "signal/format_reward/centered_abs_mean": 0.013585069729015232, "signal/format_reward/group_std_mean": 0.026397685706615447, "signal/format_reward/group_zero_std_frac": 0.8833333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.07591437287628651, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006792534864507616, "signal/frontier_coverage_0/centered_abs_mean": 0.16189261376857758, "signal/frontier_coverage_0/group_std_mean": 0.21290515959262848, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02617349661886692, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023150643799453976, "signal/frontier_coverage_1/centered_abs_mean": 0.16189261376857758, "signal/frontier_coverage_1/group_std_mean": 0.21290515959262848, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02617349661886692, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023150643799453976, "signal/frontier_coverage_10/centered_abs_mean": 0.16189261376857758, "signal/frontier_coverage_10/group_std_mean": 0.21290515959262848, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02617349661886692, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023150643799453976, "signal/frontier_coverage_15/centered_abs_mean": 0.16189261376857758, "signal/frontier_coverage_15/group_std_mean": 0.21290515959262848, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02617349661886692, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023150643799453976, "signal/frontier_coverage_20/centered_abs_mean": 0.16189261376857758, "signal/frontier_coverage_20/group_std_mean": 0.21290515959262848, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02617349661886692, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023150643799453976, "signal/frontier_coverage_25/centered_abs_mean": 0.16189261376857758, "signal/frontier_coverage_25/group_std_mean": 0.21290515959262848, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02617349661886692, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023150643799453976, "signal/frontier_coverage_5/centered_abs_mean": 0.16189261376857758, "signal/frontier_coverage_5/group_std_mean": 0.21290515959262848, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02617349661886692, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023150643799453976, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37562611103057864, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4406170785427094, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4253277540206909, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03756261095404625, "step": 80 }, { "calibration/aurc": 0.3014076575162675, "calibration/batch_distribution_entropy": 0.9125055866858618, "calibration/buffer_distribution_entropy": 0.9188331792178456, "calibration/confidence_entropy": 0.5207189815765981, "calibration/coverage@0%": 0.00841005981688481, "calibration/coverage@1%": 0.00841005981688481, "calibration/coverage@10%": 0.00998486296649111, "calibration/coverage@15%": 0.014755210586203521, "calibration/coverage@20%": 0.032083614827450556, "calibration/coverage@25%": 0.3209187626504969, "calibration/coverage@30%": 0.607871104717331, "calibration/coverage@5%": 0.00841005981688481, "calibration/ece": 0.16054550562218886, "calibration/mean_confidence": 0.6530601834127058, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006944444444444442, "completions/max_length": 3695.8, "completions/max_terminated_length": 3695.8, "completions/mean_length": 648.8045166015625, "completions/mean_terminated_length": 653.3523803710938, "completions/min_length": 0.0, "completions/min_terminated_length": 149.6, "epoch": 0.2039974500318746, "grad_norm": 0.002358554396778345, "learning_rate": 3.7048192771084342e-06, "loss": -0.0216, "num_tokens": 172786993.0, "reward": 0.9661161661148071, "reward_std": 0.13587609827518463, "rewards/accuracy_reward": 0.6900173425674438, "rewards/brier_reward": 0.7510282516479492, "rewards/confidence_uniqueness_reward": 0.9363226532936096, "rewards/format_reward": 0.992881965637207, "rewards/frontier_coverage_0": -0.04805287569761276, "rewards/frontier_coverage_1": -0.04805287569761276, "rewards/frontier_coverage_10": -0.04805287569761276, "rewards/frontier_coverage_15": -0.04805287569761276, "rewards/frontier_coverage_20": -0.04805287569761276, "rewards/frontier_coverage_25": -0.04805287569761276, "rewards/frontier_coverage_5": -0.04805287569761276, "rewards/frontier_entropy_batch_reward": -0.39258493185043336, "signal/accuracy_reward/centered_abs_mean": 0.1508843332529068, "signal/accuracy_reward/group_std_mean": 0.20189307630062103, "signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9263910770416259, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0754421666264534, "signal/advantage_abs_mean": 0.7528648376464844, "signal/advantage_pre_scale_abs_mean": 0.10168863832950592, "signal/advantage_pre_scale_std": 0.1590551733970642, "signal/advantage_std": 0.9832520723342896, "signal/brier_reward/centered_abs_mean": 0.17436771094799042, "signal/brier_reward/group_std_mean": 0.2172168791294098, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21471179723739625, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017436770349740983, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02653498910367489, "signal/confidence_uniqueness_reward/group_std_mean": 0.045722561329603194, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.032590895891189575, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026534989941865207, "signal/format_reward/centered_abs_mean": 0.013118489645421505, "signal/format_reward/group_std_mean": 0.029562078043818475, "signal/format_reward/group_zero_std_frac": 0.8611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08024730533361435, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0065592448227107525, "signal/frontier_coverage_0/centered_abs_mean": 0.14816038608551024, "signal/frontier_coverage_0/group_std_mean": 0.19712282717227936, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.026096120849251746, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002118693618103862, "signal/frontier_coverage_1/centered_abs_mean": 0.14816038608551024, "signal/frontier_coverage_1/group_std_mean": 0.19712282717227936, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.026096120849251746, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002118693618103862, "signal/frontier_coverage_10/centered_abs_mean": 0.14816038608551024, "signal/frontier_coverage_10/group_std_mean": 0.19712282717227936, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.026096120849251746, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002118693618103862, "signal/frontier_coverage_15/centered_abs_mean": 0.14816038608551024, "signal/frontier_coverage_15/group_std_mean": 0.19712282717227936, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026096120849251746, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002118693618103862, "signal/frontier_coverage_20/centered_abs_mean": 0.14816038608551024, "signal/frontier_coverage_20/group_std_mean": 0.19712282717227936, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.026096120849251746, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002118693618103862, "signal/frontier_coverage_25/centered_abs_mean": 0.14816038608551024, "signal/frontier_coverage_25/group_std_mean": 0.19712282717227936, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026096120849251746, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002118693618103862, "signal/frontier_coverage_5/centered_abs_mean": 0.14816038608551024, "signal/frontier_coverage_5/group_std_mean": 0.19712282717227936, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.026096120849251746, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002118693618103862, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3885017096996307, "signal/frontier_entropy_batch_reward/group_std_mean": 0.44662662744522097, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4786907732486725, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03885017111897469, "step": 85 }, { "calibration/aurc": 0.2280682511148271, "calibration/batch_distribution_entropy": 0.9191445965224319, "calibration/buffer_distribution_entropy": 0.9203928575702902, "calibration/confidence_entropy": 0.5090574346056453, "calibration/coverage@0%": 0.00573603781882146, "calibration/coverage@1%": 0.00573603781882146, "calibration/coverage@10%": 0.055215204485488126, "calibration/coverage@15%": 0.3963610378188215, "calibration/coverage@20%": 0.5604235378188214, "calibration/coverage@25%": 0.6255277044854881, "calibration/coverage@30%": 0.7142562664907651, "calibration/coverage@5%": 0.01667353781882146, "calibration/ece": 0.1746891027457266, "calibration/mean_confidence": 0.652321831845221, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00512152777777779, "completions/max_length": 3058.2, "completions/max_terminated_length": 3058.2, "completions/mean_length": 614.4275146484375, "completions/mean_terminated_length": 617.5863159179687, "completions/min_length": 0.0, "completions/min_terminated_length": 145.4, "epoch": 0.2159973000337496, "grad_norm": 0.002601947635412216, "learning_rate": 3.5542168674698798e-06, "loss": -0.0059, "num_tokens": 182933870.0, "reward": 0.9689822554588318, "reward_std": 0.13325872272253036, "rewards/accuracy_reward": 0.684375, "rewards/brier_reward": 0.7553081393241883, "rewards/confidence_uniqueness_reward": 0.9397584915161132, "rewards/format_reward": 0.9948784708976746, "rewards/frontier_coverage_0": -0.039187131077051164, "rewards/frontier_coverage_1": -0.039187131077051164, "rewards/frontier_coverage_10": -0.039187131077051164, "rewards/frontier_coverage_15": -0.039187131077051164, "rewards/frontier_coverage_20": -0.039187131077051164, "rewards/frontier_coverage_25": -0.039187131077051164, "rewards/frontier_coverage_5": -0.039187131077051164, "rewards/frontier_entropy_batch_reward": -0.3622850239276886, "signal/accuracy_reward/centered_abs_mean": 0.1550998270511627, "signal/accuracy_reward/group_std_mean": 0.20906379520893098, "signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9399829387664795, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07754991352558135, "signal/advantage_abs_mean": 0.7509470582008362, "signal/advantage_pre_scale_abs_mean": 0.10027577131986617, "signal/advantage_pre_scale_std": 0.15472148954868317, "signal/advantage_std": 0.9832675933837891, "signal/brier_reward/centered_abs_mean": 0.18083776235580445, "signal/brier_reward/group_std_mean": 0.22394680380821227, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21995324194431304, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018083777278661728, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024045027419924737, "signal/confidence_uniqueness_reward/group_std_mean": 0.03864929303526878, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029247282445430754, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002404502872377634, "signal/format_reward/centered_abs_mean": 0.009467230830341577, "signal/format_reward/group_std_mean": 0.02073230631649494, "signal/format_reward/group_zero_std_frac": 0.9055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05722929909825325, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004733615415170788, "signal/frontier_coverage_0/centered_abs_mean": 0.16228995025157927, "signal/frontier_coverage_0/group_std_mean": 0.21590131521224976, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02817150242626667, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023207463324069976, "signal/frontier_coverage_1/centered_abs_mean": 0.16228995025157927, "signal/frontier_coverage_1/group_std_mean": 0.21590131521224976, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02817150242626667, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023207463324069976, "signal/frontier_coverage_10/centered_abs_mean": 0.16228995025157927, "signal/frontier_coverage_10/group_std_mean": 0.21590131521224976, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02817150242626667, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023207463324069976, "signal/frontier_coverage_15/centered_abs_mean": 0.16228995025157927, "signal/frontier_coverage_15/group_std_mean": 0.21590131521224976, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02817150242626667, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023207463324069976, "signal/frontier_coverage_20/centered_abs_mean": 0.16228995025157927, "signal/frontier_coverage_20/group_std_mean": 0.21590131521224976, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02817150242626667, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023207463324069976, "signal/frontier_coverage_25/centered_abs_mean": 0.16228995025157927, "signal/frontier_coverage_25/group_std_mean": 0.21590131521224976, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02817150242626667, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023207463324069976, "signal/frontier_coverage_5/centered_abs_mean": 0.16228995025157927, "signal/frontier_coverage_5/group_std_mean": 0.21590131521224976, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02817150242626667, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023207463324069976, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38060142397880553, "signal/frontier_entropy_batch_reward/group_std_mean": 0.44278682470321656, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.46376983523368837, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03806014358997345, "step": 90 }, { "calibration/aurc": 0.2656779618361436, "calibration/batch_distribution_entropy": 0.921673087913151, "calibration/buffer_distribution_entropy": 0.9221058711743622, "calibration/confidence_entropy": 0.5219745548320699, "calibration/coverage@0%": 0.003655373839760502, "calibration/coverage@1%": 0.003655373839760502, "calibration/coverage@10%": 0.10779749862161041, "calibration/coverage@15%": 0.37434690961637485, "calibration/coverage@20%": 0.4020928738397605, "calibration/coverage@25%": 0.5248110125353711, "calibration/coverage@30%": 0.6770166637048378, "calibration/coverage@5%": 0.003655373839760502, "calibration/ece": 0.1682618991312315, "calibration/mean_confidence": 0.6443340086406893, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003993055555555558, "completions/max_length": 3106.8, "completions/max_terminated_length": 3106.8, "completions/mean_length": 624.9882690429688, "completions/mean_terminated_length": 627.4920532226563, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.22799715003562457, "grad_norm": 0.0027543448377400637, "learning_rate": 3.4036144578313257e-06, "loss": -0.0072, "num_tokens": 193225415.0, "reward": 0.9632153868675232, "reward_std": 0.12627761960029601, "rewards/accuracy_reward": 0.6706597208976746, "rewards/brier_reward": 0.7587080836296082, "rewards/confidence_uniqueness_reward": 0.9400440454483032, "rewards/format_reward": 0.9953993082046508, "rewards/frontier_coverage_0": -0.027446018159389497, "rewards/frontier_coverage_1": -0.027446018159389497, "rewards/frontier_coverage_10": -0.027446018159389497, "rewards/frontier_coverage_15": -0.027446018159389497, "rewards/frontier_coverage_20": -0.027446018159389497, "rewards/frontier_coverage_25": -0.027446018159389497, "rewards/frontier_coverage_5": -0.027446018159389497, "rewards/frontier_entropy_batch_reward": -0.3694201588630676, "signal/accuracy_reward/centered_abs_mean": 0.13590494990348817, "signal/accuracy_reward/group_std_mean": 0.1884896844625473, "signal/accuracy_reward/group_zero_std_frac": 0.43055555820465086, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8468737006187439, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06795247495174409, "signal/advantage_abs_mean": 0.752357542514801, "signal/advantage_pre_scale_abs_mean": 0.09376581460237503, "signal/advantage_pre_scale_std": 0.1467900037765503, "signal/advantage_std": 0.983231246471405, "signal/brier_reward/centered_abs_mean": 0.1709260106086731, "signal/brier_reward/group_std_mean": 0.21340954005718232, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21400478780269622, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017092601954936983, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022889725863933563, "signal/confidence_uniqueness_reward/group_std_mean": 0.037352363020181654, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02867819517850876, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022889725398272274, "signal/format_reward/centered_abs_mean": 0.008599175233393907, "signal/format_reward/group_std_mean": 0.019867047667503357, "signal/format_reward/group_zero_std_frac": 0.9055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.053667180240154266, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004299587616696954, "signal/frontier_coverage_0/centered_abs_mean": 0.15505702197551727, "signal/frontier_coverage_0/group_std_mean": 0.2044772982597351, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.027795213833451272, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022173153702169657, "signal/frontier_coverage_1/centered_abs_mean": 0.15505702197551727, "signal/frontier_coverage_1/group_std_mean": 0.2044772982597351, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.027795213833451272, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022173153702169657, "signal/frontier_coverage_10/centered_abs_mean": 0.15505702197551727, "signal/frontier_coverage_10/group_std_mean": 0.2044772982597351, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.027795213833451272, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022173153702169657, "signal/frontier_coverage_15/centered_abs_mean": 0.15505702197551727, "signal/frontier_coverage_15/group_std_mean": 0.2044772982597351, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.027795213833451272, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022173153702169657, "signal/frontier_coverage_20/centered_abs_mean": 0.15505702197551727, "signal/frontier_coverage_20/group_std_mean": 0.2044772982597351, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027795213833451272, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022173153702169657, "signal/frontier_coverage_25/centered_abs_mean": 0.15505702197551727, "signal/frontier_coverage_25/group_std_mean": 0.2044772982597351, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027795213833451272, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022173153702169657, "signal/frontier_coverage_5/centered_abs_mean": 0.15505702197551727, "signal/frontier_coverage_5/group_std_mean": 0.2044772982597351, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.027795213833451272, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022173153702169657, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38333263993263245, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4431163430213928, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.48075162172317504, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038333263248205185, "step": 95 }, { "calibration/aurc": 0.20759311463380917, "calibration/batch_distribution_entropy": 0.9477314132460055, "calibration/buffer_distribution_entropy": 0.9242834024494486, "calibration/confidence_entropy": 0.5270175050710435, "calibration/coverage@0%": 0.009675166218401008, "calibration/coverage@1%": 0.009675166218401008, "calibration/coverage@10%": 0.0938530992208724, "calibration/coverage@15%": 0.26268486014223075, "calibration/coverage@20%": 0.616002402139018, "calibration/coverage@25%": 0.7508021390374331, "calibration/coverage@30%": 0.9032085561497327, "calibration/coverage@5%": 0.009675166218401008, "calibration/ece": 0.1630102667152334, "calibration/mean_confidence": 0.6048623992059596, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025954861111111116, "completions/max_length": 3420.6, "completions/max_terminated_length": 3420.6, "completions/mean_length": 632.4453002929688, "completions/mean_terminated_length": 649.3080322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 167.6, "epoch": 0.23999700003749952, "grad_norm": 0.002470463514328003, "learning_rate": 3.2530120481927713e-06, "loss": -0.0635, "num_tokens": 203610257.0, "reward": 0.9558441638946533, "reward_std": 0.1599712163209915, "rewards/accuracy_reward": 0.6660590291023254, "rewards/brier_reward": 0.7662190675735474, "rewards/confidence_uniqueness_reward": 0.9209245562553405, "rewards/format_reward": 0.9719617962837219, "rewards/frontier_coverage_0": -0.00996593926101923, "rewards/frontier_coverage_1": -0.00996593926101923, "rewards/frontier_coverage_10": -0.00996593926101923, "rewards/frontier_coverage_15": -0.00996593926101923, "rewards/frontier_coverage_20": -0.00996593926101923, "rewards/frontier_coverage_25": -0.00996593926101923, "rewards/frontier_coverage_5": -0.00996593926101923, "rewards/frontier_entropy_batch_reward": -0.3088305056095123, "signal/accuracy_reward/centered_abs_mean": 0.1613226979970932, "signal/accuracy_reward/group_std_mean": 0.2118624597787857, "signal/accuracy_reward/group_zero_std_frac": 0.397222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9061164379119873, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0806613489985466, "signal/advantage_abs_mean": 0.7433346390724183, "signal/advantage_pre_scale_abs_mean": 0.11672266870737076, "signal/advantage_pre_scale_std": 0.19095246195793153, "signal/advantage_std": 0.9833595633506775, "signal/brier_reward/centered_abs_mean": 0.17435405254364014, "signal/brier_reward/group_std_mean": 0.22024931907653808, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19577785432338715, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017435405775904654, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05343219414353371, "signal/confidence_uniqueness_reward/group_std_mean": 0.0928901955485344, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05993582606315613, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005343219451606274, "signal/format_reward/centered_abs_mean": 0.04528537318110466, "signal/format_reward/group_std_mean": 0.08404082655906678, "signal/format_reward/group_zero_std_frac": 0.6611111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.25386848151683805, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02264268659055233, "signal/frontier_coverage_0/centered_abs_mean": 0.16983620524406434, "signal/frontier_coverage_0/group_std_mean": 0.22350181639194489, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02728012129664421, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024286577478051185, "signal/frontier_coverage_1/centered_abs_mean": 0.16983620524406434, "signal/frontier_coverage_1/group_std_mean": 0.22350181639194489, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02728012129664421, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024286577478051185, "signal/frontier_coverage_10/centered_abs_mean": 0.16983620524406434, "signal/frontier_coverage_10/group_std_mean": 0.22350181639194489, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02728012129664421, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024286577478051185, "signal/frontier_coverage_15/centered_abs_mean": 0.16983620524406434, "signal/frontier_coverage_15/group_std_mean": 0.22350181639194489, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02728012129664421, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024286577478051185, "signal/frontier_coverage_20/centered_abs_mean": 0.16983620524406434, "signal/frontier_coverage_20/group_std_mean": 0.22350181639194489, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02728012129664421, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024286577478051185, "signal/frontier_coverage_25/centered_abs_mean": 0.16983620524406434, "signal/frontier_coverage_25/group_std_mean": 0.22350181639194489, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02728012129664421, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024286577478051185, "signal/frontier_coverage_5/centered_abs_mean": 0.16983620524406434, "signal/frontier_coverage_5/group_std_mean": 0.22350181639194489, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02728012129664421, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024286577478051185, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3497317969799042, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4170763075351715, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.39279434084892273, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034973180294036864, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 0.163130081233984, "eval_calibration/batch_distribution_entropy": 0.9014365191445948, "eval_calibration/buffer_distribution_entropy": 0.9264574248539187, "eval_calibration/confidence_entropy": 0.5035177426390464, "eval_calibration/coverage@0%": 0.15947580645161288, "eval_calibration/coverage@1%": 0.15947580645161288, "eval_calibration/coverage@10%": 0.3776993727598566, "eval_calibration/coverage@15%": 0.4990255376344086, "eval_calibration/coverage@20%": 0.717909946236559, "eval_calibration/coverage@25%": 0.8870967741935484, "eval_calibration/coverage@30%": 0.9946236559139785, "eval_calibration/coverage@5%": 0.24801747311827957, "eval_calibration/ece": 0.272578609146035, "eval_calibration/mean_confidence": 0.6127355205524417, "eval_completions/clipped_ratio": 0.024131944444444442, "eval_completions/max_length": 2405.1666666666665, "eval_completions/max_terminated_length": 2405.1666666666665, "eval_completions/mean_length": 635.5255432128906, "eval_completions/mean_terminated_length": 651.1894124348959, "eval_completions/min_length": 0.0, "eval_completions/min_terminated_length": 199.0, "eval_loss": 0.0, "eval_num_tokens": 203610257.0, "eval_reward": 0.8911056915918986, "eval_reward_std": 0.2602160597840945, "eval_rewards/accuracy_reward": 0.6770833333333334, "eval_rewards/brier_reward": 0.7757821977138519, "eval_rewards/confidence_uniqueness_reward": 0.8653644323348999, "eval_rewards/format_reward": 0.9722222089767456, "eval_rewards/frontier_coverage_0": -0.004391265024120609, "eval_rewards/frontier_coverage_1": -0.004391265024120609, "eval_rewards/frontier_coverage_10": -0.004391265024120609, "eval_rewards/frontier_coverage_15": -0.004391265024120609, "eval_rewards/frontier_coverage_20": -0.004391265024120609, "eval_rewards/frontier_coverage_25": -0.004391265024120609, "eval_rewards/frontier_coverage_5": -0.004391265024120609, "eval_rewards/frontier_entropy_batch_reward": -0.9722222089767456, "eval_runtime": 207.7682, "eval_samples_per_second": 4.813, "eval_signal/accuracy_reward/centered_abs_mean": 0.4248046825329463, "eval_signal/accuracy_reward/group_std_mean": 0.4674356331427892, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8317528963088989, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21240234126647314, "eval_signal/advantage_abs_mean": 0.8401626845200857, "eval_signal/advantage_pre_scale_abs_mean": 0.21814856926600137, "eval_signal/advantage_pre_scale_std": 0.2586393654346466, "eval_signal/advantage_std": 0.9864379862944285, "eval_signal/brier_reward/centered_abs_mean": 0.21349711219469705, "eval_signal/brier_reward/group_std_mean": 0.27180638660987216, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0832139253616333, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.021349711654086907, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0751443641881148, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.14167124529679617, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02908085659146309, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0075144364188114805, "eval_signal/format_reward/centered_abs_mean": 0.05284288184096416, "eval_signal/format_reward/group_std_mean": 0.1326932366937399, "eval_signal/format_reward/group_zero_std_frac": 0.3333333407839139, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.10094406145314376, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.02642144092048208, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.21956058591604233, "eval_signal/frontier_coverage_0/group_std_mean": 0.3250137319167455, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.012314057908952236, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031397163790340223, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.21956058591604233, "eval_signal/frontier_coverage_1/group_std_mean": 0.3250137319167455, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.012314057908952236, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031397163790340223, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.21956058591604233, "eval_signal/frontier_coverage_10/group_std_mean": 0.3250137319167455, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012314057908952236, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031397163790340223, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.21956058591604233, "eval_signal/frontier_coverage_15/group_std_mean": 0.3250137319167455, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012314057908952236, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031397163790340223, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.21956058591604233, "eval_signal/frontier_coverage_20/group_std_mean": 0.3250137319167455, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012314057908952236, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031397163790340223, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.21956058591604233, "eval_signal/frontier_coverage_25/group_std_mean": 0.3250137319167455, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.012314057908952236, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031397163790340223, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.21956058591604233, "eval_signal/frontier_coverage_5/group_std_mean": 0.3250137319167455, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.012314057908952236, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031397163790340223, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.05284288184096416, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.1326932366937399, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3333333407839139, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02018881356343627, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.005284288238423566, "eval_steps_per_second": 0.029, "step": 100 }, { "calibration/aurc": 0.27758447836015676, "calibration/batch_distribution_entropy": 0.9414206143630739, "calibration/buffer_distribution_entropy": 0.927855162724946, "calibration/confidence_entropy": 0.47734935606033496, "calibration/coverage@0%": 0.0075463989800637786, "calibration/coverage@1%": 0.0075463989800637786, "calibration/coverage@10%": 0.12135855367619637, "calibration/coverage@15%": 0.24880930719793426, "calibration/coverage@20%": 0.33652223634634276, "calibration/coverage@25%": 0.4669609459964935, "calibration/coverage@30%": 0.6008762480729914, "calibration/coverage@5%": 0.05174529400768809, "calibration/ece": 0.13926885015949225, "calibration/mean_confidence": 0.6254293779927222, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025, "completions/max_length": 3690.0, "completions/max_terminated_length": 3690.0, "completions/mean_length": 632.2384643554688, "completions/mean_terminated_length": 648.3413330078125, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.2519968500393745, "grad_norm": 0.0030207051895558834, "learning_rate": 3.1024096385542172e-06, "loss": -0.07, "num_tokens": 213970508.0, "reward": 0.9623825907707214, "reward_std": 0.1516391783952713, "rewards/accuracy_reward": 0.6657986164093017, "rewards/brier_reward": 0.7901792764663697, "rewards/confidence_uniqueness_reward": 0.9224253416061401, "rewards/format_reward": 0.9745659708976746, "rewards/frontier_coverage_0": 0.02123640524223447, "rewards/frontier_coverage_1": 0.02123640524223447, "rewards/frontier_coverage_10": 0.02123640524223447, "rewards/frontier_coverage_15": 0.02123640524223447, "rewards/frontier_coverage_20": 0.02123640524223447, "rewards/frontier_coverage_25": 0.02123640524223447, "rewards/frontier_coverage_5": 0.02123640524223447, "rewards/frontier_entropy_batch_reward": -0.3118593841791153, "signal/accuracy_reward/centered_abs_mean": 0.15443793088197708, "signal/accuracy_reward/group_std_mean": 0.20904378294944764, "signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9191455960273742, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07721896544098854, "signal/advantage_abs_mean": 0.7209781050682068, "signal/advantage_pre_scale_abs_mean": 0.10629049986600876, "signal/advantage_pre_scale_std": 0.1804076611995697, "signal/advantage_std": 0.9832780361175537, "signal/brier_reward/centered_abs_mean": 0.15626430809497832, "signal/brier_reward/group_std_mean": 0.20239726901054383, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18771646320819854, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015626430884003638, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.052898465842008593, "signal/confidence_uniqueness_reward/group_std_mean": 0.09158898591995239, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06356689184904099, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005289846519008279, "signal/format_reward/centered_abs_mean": 0.04350586049258709, "signal/format_reward/group_std_mean": 0.08119002729654312, "signal/format_reward/group_zero_std_frac": 0.6722222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2607091456651688, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.021752930246293545, "signal/frontier_coverage_0/centered_abs_mean": 0.17601246535778045, "signal/frontier_coverage_0/group_std_mean": 0.23166741728782653, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030123594403266906, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025169781874865294, "signal/frontier_coverage_1/centered_abs_mean": 0.17601246535778045, "signal/frontier_coverage_1/group_std_mean": 0.23166741728782653, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030123594403266906, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025169781874865294, "signal/frontier_coverage_10/centered_abs_mean": 0.17601246535778045, "signal/frontier_coverage_10/group_std_mean": 0.23166741728782653, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030123594403266906, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025169781874865294, "signal/frontier_coverage_15/centered_abs_mean": 0.17601246535778045, "signal/frontier_coverage_15/group_std_mean": 0.23166741728782653, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030123594403266906, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025169781874865294, "signal/frontier_coverage_20/centered_abs_mean": 0.17601246535778045, "signal/frontier_coverage_20/group_std_mean": 0.23166741728782653, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030123594403266906, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025169781874865294, "signal/frontier_coverage_25/centered_abs_mean": 0.17601246535778045, "signal/frontier_coverage_25/group_std_mean": 0.23166741728782653, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030123594403266906, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025169781874865294, "signal/frontier_coverage_5/centered_abs_mean": 0.17601246535778045, "signal/frontier_coverage_5/group_std_mean": 0.23166741728782653, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030123594403266906, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025169781874865294, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.334915554523468, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4050456404685974, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.40600050091743467, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033491555601358414, "step": 105 }, { "calibration/aurc": 0.1623917508484302, "calibration/batch_distribution_entropy": 0.9382471960574534, "calibration/buffer_distribution_entropy": 0.9295560400990392, "calibration/confidence_entropy": 0.48187896497077476, "calibration/coverage@0%": 0.029851500799161275, "calibration/coverage@1%": 0.029851500799161275, "calibration/coverage@10%": 0.4238398178142801, "calibration/coverage@15%": 0.5081844529158787, "calibration/coverage@20%": 0.6088912176070422, "calibration/coverage@25%": 0.8298052000748772, "calibration/coverage@30%": 0.9122503108164111, "calibration/coverage@5%": 0.22448740089895208, "calibration/ece": 0.1263346756856248, "calibration/mean_confidence": 0.6134829214978298, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666652, "completions/max_length": 3428.0, "completions/max_terminated_length": 3428.0, "completions/mean_length": 629.52822265625, "completions/mean_terminated_length": 636.1643432617187, "completions/min_length": 0.0, "completions/min_terminated_length": 141.8, "epoch": 0.2639967000412495, "grad_norm": 0.003066908335313201, "learning_rate": 2.9518072289156627e-06, "loss": -0.0232, "num_tokens": 224331121.0, "reward": 0.9897796273231506, "reward_std": 0.12779354751110078, "rewards/accuracy_reward": 0.7029513835906982, "rewards/brier_reward": 0.7993221998214721, "rewards/confidence_uniqueness_reward": 0.9374632716178894, "rewards/format_reward": 0.9893229126930236, "rewards/frontier_coverage_0": 0.0023610764765180647, "rewards/frontier_coverage_1": 0.0023610764765180647, "rewards/frontier_coverage_10": 0.0023610764765180647, "rewards/frontier_coverage_15": 0.0023610764765180647, "rewards/frontier_coverage_20": 0.0023610764765180647, "rewards/frontier_coverage_25": 0.0023610764765180647, "rewards/frontier_coverage_5": 0.0023610764765180647, "rewards/frontier_entropy_batch_reward": -0.3027245044708252, "signal/accuracy_reward/centered_abs_mean": 0.1584743946790695, "signal/accuracy_reward/group_std_mean": 0.20634441077709198, "signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0239338517189025, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07923719733953476, "signal/advantage_abs_mean": 0.7589234232902526, "signal/advantage_pre_scale_abs_mean": 0.0962674856185913, "signal/advantage_pre_scale_std": 0.1529387891292572, "signal/advantage_std": 0.9831875920295715, "signal/brier_reward/centered_abs_mean": 0.14430948197841645, "signal/brier_reward/group_std_mean": 0.1849027007818222, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1869402378797531, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014430948719382285, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03034689761698246, "signal/confidence_uniqueness_reward/group_std_mean": 0.05047857165336609, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03914179354906082, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030346899293363094, "signal/format_reward/centered_abs_mean": 0.017876519449055196, "signal/format_reward/group_std_mean": 0.03566114716231823, "signal/format_reward/group_zero_std_frac": 0.8444444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.11467134803533555, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008938259724527598, "signal/frontier_coverage_0/centered_abs_mean": 0.17844413220882416, "signal/frontier_coverage_0/group_std_mean": 0.23779484033584594, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03313328959047794, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002551751025021076, "signal/frontier_coverage_1/centered_abs_mean": 0.17844413220882416, "signal/frontier_coverage_1/group_std_mean": 0.23779484033584594, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03313328959047794, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002551751025021076, "signal/frontier_coverage_10/centered_abs_mean": 0.17844413220882416, "signal/frontier_coverage_10/group_std_mean": 0.23779484033584594, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03313328959047794, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002551751025021076, "signal/frontier_coverage_15/centered_abs_mean": 0.17844413220882416, "signal/frontier_coverage_15/group_std_mean": 0.23779484033584594, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03313328959047794, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002551751025021076, "signal/frontier_coverage_20/centered_abs_mean": 0.17844413220882416, "signal/frontier_coverage_20/group_std_mean": 0.23779484033584594, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03313328959047794, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002551751025021076, "signal/frontier_coverage_25/centered_abs_mean": 0.17844413220882416, "signal/frontier_coverage_25/group_std_mean": 0.23779484033584594, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03313328959047794, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002551751025021076, "signal/frontier_coverage_5/centered_abs_mean": 0.17844413220882416, "signal/frontier_coverage_5/group_std_mean": 0.23779484033584594, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03313328959047794, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002551751025021076, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.336598539352417, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40353216528892516, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4363815426826477, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03365985415875912, "step": 110 }, { "calibration/aurc": 0.2785963055836248, "calibration/batch_distribution_entropy": 0.9652125750310209, "calibration/buffer_distribution_entropy": 0.9330482648173744, "calibration/confidence_entropy": 0.49621660057634137, "calibration/coverage@0%": 0.007910071105482502, "calibration/coverage@1%": 0.007910071105482502, "calibration/coverage@10%": 0.029982246120544936, "calibration/coverage@15%": 0.1566908143448694, "calibration/coverage@20%": 0.3872066256148847, "calibration/coverage@25%": 0.582606519819888, "calibration/coverage@30%": 0.6653970694296458, "calibration/coverage@5%": 0.007910071105482502, "calibration/ece": 0.1682723224397847, "calibration/mean_confidence": 0.5473267088527768, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016145833333333325, "completions/max_length": 3463.0, "completions/max_terminated_length": 3463.0, "completions/mean_length": 619.2389770507813, "completions/mean_terminated_length": 629.3917236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 144.4, "epoch": 0.27599655004312446, "grad_norm": 0.002968505723401904, "learning_rate": 2.8012048192771087e-06, "loss": -0.0457, "num_tokens": 234543954.0, "reward": 0.9655173897743226, "reward_std": 0.13793158531188965, "rewards/accuracy_reward": 0.6552951335906982, "rewards/brier_reward": 0.7801418542861939, "rewards/confidence_uniqueness_reward": 0.9340383648872376, "rewards/format_reward": 0.9837673664093017, "rewards/frontier_coverage_0": 0.02393667958676815, "rewards/frontier_coverage_1": 0.02393667958676815, "rewards/frontier_coverage_10": 0.02393667958676815, "rewards/frontier_coverage_15": 0.02393667958676815, "rewards/frontier_coverage_20": 0.02393667958676815, "rewards/frontier_coverage_25": 0.02393667958676815, "rewards/frontier_coverage_5": 0.02393667958676815, "rewards/frontier_entropy_batch_reward": -0.27827951312065125, "signal/accuracy_reward/centered_abs_mean": 0.16129014790058135, "signal/accuracy_reward/group_std_mean": 0.20588865578174592, "signal/accuracy_reward/group_zero_std_frac": 0.4388889014720917, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9973422050476074, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08064507395029068, "signal/advantage_abs_mean": 0.7537703037261962, "signal/advantage_pre_scale_abs_mean": 0.10260143429040909, "signal/advantage_pre_scale_std": 0.16498699486255647, "signal/advantage_std": 0.983244001865387, "signal/brier_reward/centered_abs_mean": 0.16035984754562377, "signal/brier_reward/group_std_mean": 0.20447275638580323, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19879674315452575, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01603598427027464, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.037899629771709444, "signal/confidence_uniqueness_reward/group_std_mean": 0.06762906014919282, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04686418101191521, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037899629678577185, "signal/format_reward/centered_abs_mean": 0.02791341170668602, "signal/format_reward/group_std_mean": 0.05625998750329018, "signal/format_reward/group_zero_std_frac": 0.7555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.17200126945972444, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01395670585334301, "signal/frontier_coverage_0/centered_abs_mean": 0.2007855713367462, "signal/frontier_coverage_0/group_std_mean": 0.2609905391931534, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035559892654418945, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028712335973978043, "signal/frontier_coverage_1/centered_abs_mean": 0.2007855713367462, "signal/frontier_coverage_1/group_std_mean": 0.2609905391931534, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035559892654418945, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028712335973978043, "signal/frontier_coverage_10/centered_abs_mean": 0.2007855713367462, "signal/frontier_coverage_10/group_std_mean": 0.2609905391931534, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.035559892654418945, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028712335973978043, "signal/frontier_coverage_15/centered_abs_mean": 0.2007855713367462, "signal/frontier_coverage_15/group_std_mean": 0.2609905391931534, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.035559892654418945, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028712335973978043, "signal/frontier_coverage_20/centered_abs_mean": 0.2007855713367462, "signal/frontier_coverage_20/group_std_mean": 0.2609905391931534, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.035559892654418945, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028712335973978043, "signal/frontier_coverage_25/centered_abs_mean": 0.2007855713367462, "signal/frontier_coverage_25/group_std_mean": 0.2609905391931534, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.035559892654418945, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028712335973978043, "signal/frontier_coverage_5/centered_abs_mean": 0.2007855713367462, "signal/frontier_coverage_5/group_std_mean": 0.2609905391931534, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035559892654418945, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028712335973978043, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.335198974609375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4032855689525604, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4162396967411041, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033519898727536204, "step": 115 }, { "calibration/aurc": 0.25985110868658695, "calibration/batch_distribution_entropy": 0.9631548287364671, "calibration/buffer_distribution_entropy": 0.9368432393792772, "calibration/confidence_entropy": 0.45159534427369225, "calibration/coverage@0%": 0.0010471275946903505, "calibration/coverage@1%": 0.0010471275946903505, "calibration/coverage@10%": 0.21385096429441958, "calibration/coverage@15%": 0.39416616560362666, "calibration/coverage@20%": 0.4899177481296418, "calibration/coverage@25%": 0.5565116507652067, "calibration/coverage@30%": 0.6357331500523827, "calibration/coverage@5%": 0.02151956853957224, "calibration/ece": 0.16906323787142602, "calibration/mean_confidence": 0.553027232437221, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009895833333333348, "completions/max_length": 3375.2, "completions/max_terminated_length": 3375.2, "completions/mean_length": 620.9748413085938, "completions/mean_terminated_length": 627.0546997070312, "completions/min_length": 0.0, "completions/min_terminated_length": 152.2, "epoch": 0.28799640004499943, "grad_norm": 0.003942957613617182, "learning_rate": 2.6506024096385547e-06, "loss": -0.0242, "num_tokens": 244779440.0, "reward": 0.9828472375869751, "reward_std": 0.1266437292098999, "rewards/accuracy_reward": 0.6784722208976746, "rewards/brier_reward": 0.7945773005485535, "rewards/confidence_uniqueness_reward": 0.9401492238044739, "rewards/format_reward": 0.9900173664093017, "rewards/frontier_coverage_0": 0.02642001286149025, "rewards/frontier_coverage_1": 0.02642001286149025, "rewards/frontier_coverage_10": 0.02642001286149025, "rewards/frontier_coverage_15": 0.02642001286149025, "rewards/frontier_coverage_20": 0.02642001286149025, "rewards/frontier_coverage_25": 0.02642001286149025, "rewards/frontier_coverage_5": 0.02642001286149025, "rewards/frontier_entropy_batch_reward": -0.2751484811306, "signal/accuracy_reward/centered_abs_mean": 0.15174696147441863, "signal/accuracy_reward/group_std_mean": 0.2080085426568985, "signal/accuracy_reward/group_zero_std_frac": 0.37500000596046446, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9443390369415283, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07587348073720931, "signal/advantage_abs_mean": 0.7379367828369141, "signal/advantage_pre_scale_abs_mean": 0.0910405844449997, "signal/advantage_pre_scale_std": 0.14757494032382965, "signal/advantage_std": 0.9832143902778625, "signal/brier_reward/centered_abs_mean": 0.16116996705532075, "signal/brier_reward/group_std_mean": 0.20735826790332795, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20363759398460388, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016116996854543687, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03047032840549946, "signal/confidence_uniqueness_reward/group_std_mean": 0.05480174720287323, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03763532117009163, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003047033119946718, "signal/format_reward/centered_abs_mean": 0.01847330704331398, "signal/format_reward/group_std_mean": 0.040765970945358276, "signal/format_reward/group_zero_std_frac": 0.8138888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.11124600917100906, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00923665352165699, "signal/frontier_coverage_0/centered_abs_mean": 0.20754149556159973, "signal/frontier_coverage_0/group_std_mean": 0.2732445240020752, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03756205141544342, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002967843320220709, "signal/frontier_coverage_1/centered_abs_mean": 0.20754149556159973, "signal/frontier_coverage_1/group_std_mean": 0.2732445240020752, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03756205141544342, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002967843320220709, "signal/frontier_coverage_10/centered_abs_mean": 0.20754149556159973, "signal/frontier_coverage_10/group_std_mean": 0.2732445240020752, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03756205141544342, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002967843320220709, "signal/frontier_coverage_15/centered_abs_mean": 0.20754149556159973, "signal/frontier_coverage_15/group_std_mean": 0.2732445240020752, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03756205141544342, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002967843320220709, "signal/frontier_coverage_20/centered_abs_mean": 0.20754149556159973, "signal/frontier_coverage_20/group_std_mean": 0.2732445240020752, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03756205141544342, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002967843320220709, "signal/frontier_coverage_25/centered_abs_mean": 0.20754149556159973, "signal/frontier_coverage_25/group_std_mean": 0.2732445240020752, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03756205141544342, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002967843320220709, "signal/frontier_coverage_5/centered_abs_mean": 0.20754149556159973, "signal/frontier_coverage_5/group_std_mean": 0.2732445240020752, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03756205141544342, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002967843320220709, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3402287781238556, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41207742094993594, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4308716356754303, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0340228796005249, "step": 120 }, { "calibration/aurc": 0.1702944249457136, "calibration/batch_distribution_entropy": 0.9474281075428994, "calibration/buffer_distribution_entropy": 0.9399565674518694, "calibration/confidence_entropy": 0.4981072863512715, "calibration/coverage@0%": 0.10222342219869385, "calibration/coverage@1%": 0.18714101358250584, "calibration/coverage@10%": 0.352484518804438, "calibration/coverage@15%": 0.38542244882575055, "calibration/coverage@20%": 0.5395064520329215, "calibration/coverage@25%": 0.7593703528573289, "calibration/coverage@30%": 0.8751834871815959, "calibration/coverage@5%": 0.306036520545082, "calibration/ece": 0.16359838991861636, "calibration/mean_confidence": 0.5928311777908328, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005468750000000022, "completions/max_length": 2949.2, "completions/max_terminated_length": 2949.2, "completions/mean_length": 635.7155395507813, "completions/mean_terminated_length": 639.208154296875, "completions/min_length": 0.0, "completions/min_terminated_length": 139.6, "epoch": 0.2999962500468744, "grad_norm": 0.0034135030582547188, "learning_rate": 2.5e-06, "loss": -0.0155, "num_tokens": 255220547.0, "reward": 0.9929954290390015, "reward_std": 0.12329381704330444, "rewards/accuracy_reward": 0.6967013955116272, "rewards/brier_reward": 0.8013178825378418, "rewards/confidence_uniqueness_reward": 0.9443058967590332, "rewards/format_reward": 0.9944444298744202, "rewards/frontier_coverage_0": 0.01054713288322091, "rewards/frontier_coverage_1": 0.01054713288322091, "rewards/frontier_coverage_10": 0.01054713288322091, "rewards/frontier_coverage_15": 0.01054713288322091, "rewards/frontier_coverage_20": 0.01054713288322091, "rewards/frontier_coverage_25": 0.01054713288322091, "rewards/frontier_coverage_5": 0.01054713288322091, "rewards/frontier_entropy_batch_reward": -0.2819568753242493, "signal/accuracy_reward/centered_abs_mean": 0.15746527910232544, "signal/accuracy_reward/group_std_mean": 0.2058452069759369, "signal/accuracy_reward/group_zero_std_frac": 0.4194444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0113989472389222, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07873263955116272, "signal/advantage_abs_mean": 0.7568188905715942, "signal/advantage_pre_scale_abs_mean": 0.0927268460392952, "signal/advantage_pre_scale_std": 0.14396594166755677, "signal/advantage_std": 0.9831969380378723, "signal/brier_reward/centered_abs_mean": 0.14777444005012513, "signal/brier_reward/group_std_mean": 0.18979325294494628, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18979544341564178, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014777444303035736, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022845935076475143, "signal/confidence_uniqueness_reward/group_std_mean": 0.03979781419038773, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029593577980995177, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002284593554213643, "signal/format_reward/centered_abs_mean": 0.0103624127805233, "signal/format_reward/group_std_mean": 0.024810751900076866, "signal/format_reward/group_zero_std_frac": 0.8777777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0675680547952652, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00518120639026165, "signal/frontier_coverage_0/centered_abs_mean": 0.19016571938991547, "signal/frontier_coverage_0/group_std_mean": 0.24880056381225585, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0349818117916584, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002719369810074568, "signal/frontier_coverage_1/centered_abs_mean": 0.19016571938991547, "signal/frontier_coverage_1/group_std_mean": 0.24880056381225585, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0349818117916584, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002719369810074568, "signal/frontier_coverage_10/centered_abs_mean": 0.19016571938991547, "signal/frontier_coverage_10/group_std_mean": 0.24880056381225585, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0349818117916584, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002719369810074568, "signal/frontier_coverage_15/centered_abs_mean": 0.19016571938991547, "signal/frontier_coverage_15/group_std_mean": 0.24880056381225585, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0349818117916584, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002719369810074568, "signal/frontier_coverage_20/centered_abs_mean": 0.19016571938991547, "signal/frontier_coverage_20/group_std_mean": 0.24880056381225585, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0349818117916584, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002719369810074568, "signal/frontier_coverage_25/centered_abs_mean": 0.19016571938991547, "signal/frontier_coverage_25/group_std_mean": 0.24880056381225585, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0349818117916584, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002719369810074568, "signal/frontier_coverage_5/centered_abs_mean": 0.19016571938991547, "signal/frontier_coverage_5/group_std_mean": 0.24880056381225585, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0349818117916584, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002719369810074568, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3356120824813843, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4034553825855255, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4321089446544647, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03356120809912681, "step": 125 }, { "calibration/aurc": 0.21974228957333825, "calibration/batch_distribution_entropy": 0.957767837624923, "calibration/buffer_distribution_entropy": 0.9414282291526312, "calibration/confidence_entropy": 0.4740419511530939, "calibration/coverage@0%": 0.005249582744674368, "calibration/coverage@1%": 0.005249582744674368, "calibration/coverage@10%": 0.22161801602834014, "calibration/coverage@15%": 0.2912255509209624, "calibration/coverage@20%": 0.5404524177537375, "calibration/coverage@25%": 0.7363437705207907, "calibration/coverage@30%": 0.8552729630880902, "calibration/coverage@5%": 0.052246971778616924, "calibration/ece": 0.13019231823487484, "calibration/mean_confidence": 0.5658447997080991, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009722222222222233, "completions/max_length": 3547.8, "completions/max_terminated_length": 3547.8, "completions/mean_length": 649.3093017578125, "completions/mean_terminated_length": 655.8386352539062, "completions/min_length": 0.0, "completions/min_terminated_length": 140.2, "epoch": 0.3119961000487494, "grad_norm": 0.0037228513974696398, "learning_rate": 2.349397590361446e-06, "loss": -0.0148, "num_tokens": 265825390.0, "reward": 0.9755982637405396, "reward_std": 0.13257428556680678, "rewards/accuracy_reward": 0.6615451335906982, "rewards/brier_reward": 0.7983024001121521, "rewards/confidence_uniqueness_reward": 0.9400404095649719, "rewards/format_reward": 0.9899305462837219, "rewards/frontier_coverage_0": 0.036238094815053044, "rewards/frontier_coverage_1": 0.036238094815053044, "rewards/frontier_coverage_10": 0.036238094815053044, "rewards/frontier_coverage_15": 0.036238094815053044, "rewards/frontier_coverage_20": 0.036238094815053044, "rewards/frontier_coverage_25": 0.036238094815053044, "rewards/frontier_coverage_5": 0.036238094815053044, "rewards/frontier_entropy_batch_reward": -0.2760128676891327, "signal/accuracy_reward/centered_abs_mean": 0.18161349892616271, "signal/accuracy_reward/group_std_mean": 0.23401132524013518, "signal/accuracy_reward/group_zero_std_frac": 0.3555555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.1416666626930236, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09080674946308136, "signal/advantage_abs_mean": 0.764301085472107, "signal/advantage_pre_scale_abs_mean": 0.10157442539930343, "signal/advantage_pre_scale_std": 0.1548332154750824, "signal/advantage_std": 0.9832197904586792, "signal/brier_reward/centered_abs_mean": 0.1567206412553787, "signal/brier_reward/group_std_mean": 0.1994690865278244, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1979391247034073, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01567206475883722, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02809174992144108, "signal/confidence_uniqueness_reward/group_std_mean": 0.04275588467717171, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03521875329315662, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028091749642044305, "signal/format_reward/centered_abs_mean": 0.015679253730922937, "signal/format_reward/group_std_mean": 0.027723340317606926, "signal/format_reward/group_zero_std_frac": 0.8888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09744075834751129, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007839626865461469, "signal/frontier_coverage_0/centered_abs_mean": 0.2045228362083435, "signal/frontier_coverage_0/group_std_mean": 0.2676228523254395, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0369983471930027, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002924676425755024, "signal/frontier_coverage_1/centered_abs_mean": 0.2045228362083435, "signal/frontier_coverage_1/group_std_mean": 0.2676228523254395, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0369983471930027, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002924676425755024, "signal/frontier_coverage_10/centered_abs_mean": 0.2045228362083435, "signal/frontier_coverage_10/group_std_mean": 0.2676228523254395, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0369983471930027, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002924676425755024, "signal/frontier_coverage_15/centered_abs_mean": 0.2045228362083435, "signal/frontier_coverage_15/group_std_mean": 0.2676228523254395, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0369983471930027, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002924676425755024, "signal/frontier_coverage_20/centered_abs_mean": 0.2045228362083435, "signal/frontier_coverage_20/group_std_mean": 0.2676228523254395, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0369983471930027, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002924676425755024, "signal/frontier_coverage_25/centered_abs_mean": 0.2045228362083435, "signal/frontier_coverage_25/group_std_mean": 0.2676228523254395, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0369983471930027, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002924676425755024, "signal/frontier_coverage_5/centered_abs_mean": 0.2045228362083435, "signal/frontier_coverage_5/group_std_mean": 0.2676228523254395, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0369983471930027, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002924676425755024, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3297392189502716, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39750961065292356, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4163591504096985, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032973920553922655, "step": 130 }, { "calibration/aurc": 0.22207504778154225, "calibration/batch_distribution_entropy": 0.9701411645819956, "calibration/buffer_distribution_entropy": 0.9441236363940959, "calibration/confidence_entropy": 0.4802381286994982, "calibration/coverage@0%": 0.013629843240210753, "calibration/coverage@1%": 0.013629843240210753, "calibration/coverage@10%": 0.2837484478954534, "calibration/coverage@15%": 0.3598894524847338, "calibration/coverage@20%": 0.5369423730219428, "calibration/coverage@25%": 0.6182801199086677, "calibration/coverage@30%": 0.6674144997591183, "calibration/coverage@5%": 0.2120673432402108, "calibration/ece": 0.15988758741196968, "calibration/mean_confidence": 0.5410395064628674, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009027777777777768, "completions/max_length": 3522.0, "completions/max_terminated_length": 3522.0, "completions/mean_length": 615.496728515625, "completions/mean_terminated_length": 621.16328125, "completions/min_length": 0.0, "completions/min_terminated_length": 132.8, "epoch": 0.32399595005062437, "grad_norm": 0.0039030034095048904, "learning_rate": 2.1987951807228917e-06, "loss": -0.0258, "num_tokens": 276008936.0, "reward": 0.9833725333213806, "reward_std": 0.13102127313613893, "rewards/accuracy_reward": 0.6739583253860474, "rewards/brier_reward": 0.788122546672821, "rewards/confidence_uniqueness_reward": 0.9430952072143555, "rewards/format_reward": 0.9907986164093018, "rewards/frontier_coverage_0": 0.018168472126126288, "rewards/frontier_coverage_1": 0.018168472126126288, "rewards/frontier_coverage_10": 0.018168472126126288, "rewards/frontier_coverage_15": 0.018168472126126288, "rewards/frontier_coverage_20": 0.018168472126126288, "rewards/frontier_coverage_25": 0.018168472126126288, "rewards/frontier_coverage_5": 0.018168472126126288, "rewards/frontier_entropy_batch_reward": -0.2394638776779175, "signal/accuracy_reward/centered_abs_mean": 0.17725694477558135, "signal/accuracy_reward/group_std_mean": 0.22965039312839508, "signal/accuracy_reward/group_zero_std_frac": 0.36388889253139495, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0828640937805176, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08862847238779067, "signal/advantage_abs_mean": 0.7517815709114075, "signal/advantage_pre_scale_abs_mean": 0.09779231399297714, "signal/advantage_pre_scale_std": 0.15307309925556184, "signal/advantage_std": 0.983241617679596, "signal/brier_reward/centered_abs_mean": 0.15630776584148406, "signal/brier_reward/group_std_mean": 0.19986412227153777, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1932190716266632, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015630776807665826, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027252191677689552, "signal/confidence_uniqueness_reward/group_std_mean": 0.046789034456014636, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.034193987399339675, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027252191212028265, "signal/format_reward/centered_abs_mean": 0.016525607742369174, "signal/format_reward/group_std_mean": 0.03393084555864334, "signal/format_reward/group_zero_std_frac": 0.850000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10422060191631317, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008262803871184587, "signal/frontier_coverage_0/centered_abs_mean": 0.2169239789247513, "signal/frontier_coverage_0/group_std_mean": 0.2821938157081604, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03821746855974197, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031020127702504397, "signal/frontier_coverage_1/centered_abs_mean": 0.2169239789247513, "signal/frontier_coverage_1/group_std_mean": 0.2821938157081604, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03821746855974197, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031020127702504397, "signal/frontier_coverage_10/centered_abs_mean": 0.2169239789247513, "signal/frontier_coverage_10/group_std_mean": 0.2821938157081604, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03821746855974197, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031020127702504397, "signal/frontier_coverage_15/centered_abs_mean": 0.2169239789247513, "signal/frontier_coverage_15/group_std_mean": 0.2821938157081604, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03821746855974197, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031020127702504397, "signal/frontier_coverage_20/centered_abs_mean": 0.2169239789247513, "signal/frontier_coverage_20/group_std_mean": 0.2821938157081604, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03821746855974197, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031020127702504397, "signal/frontier_coverage_25/centered_abs_mean": 0.2169239789247513, "signal/frontier_coverage_25/group_std_mean": 0.2821938157081604, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03821746855974197, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031020127702504397, "signal/frontier_coverage_5/centered_abs_mean": 0.2169239789247513, "signal/frontier_coverage_5/group_std_mean": 0.2821938157081604, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03821746855974197, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031020127702504397, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3079935610294342, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3808625817298889, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3828635513782501, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03079935573041439, "step": 135 }, { "calibration/aurc": 0.1337878460272878, "calibration/batch_distribution_entropy": 0.9491518389855494, "calibration/buffer_distribution_entropy": 0.9504134002453654, "calibration/confidence_entropy": 0.470818452840835, "calibration/coverage@0%": 0.02919666230366492, "calibration/coverage@1%": 0.02919666230366492, "calibration/coverage@10%": 0.45605468305013697, "calibration/coverage@15%": 0.7058141127727731, "calibration/coverage@20%": 0.789229159831677, "calibration/coverage@25%": 0.8711048675379001, "calibration/coverage@30%": 0.9482999031140213, "calibration/coverage@5%": 0.19594786212914483, "calibration/ece": 0.11502078156694107, "calibration/mean_confidence": 0.6127720502496946, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00581597222222221, "completions/max_length": 3342.4, "completions/max_terminated_length": 3342.4, "completions/mean_length": 629.29306640625, "completions/mean_terminated_length": 632.9800415039062, "completions/min_length": 0.0, "completions/min_terminated_length": 163.2, "epoch": 0.33599580005249935, "grad_norm": 0.004057453945279121, "learning_rate": 2.0481927710843377e-06, "loss": -0.0122, "num_tokens": 286362616.0, "reward": 0.9767401456832886, "reward_std": 0.12093752324581146, "rewards/accuracy_reward": 0.6546006917953491, "rewards/brier_reward": 0.808280074596405, "rewards/confidence_uniqueness_reward": 0.9443035125732422, "rewards/format_reward": 0.9940104126930237, "rewards/frontier_coverage_0": 0.045683811977505685, "rewards/frontier_coverage_1": 0.045683811977505685, "rewards/frontier_coverage_10": 0.045683811977505685, "rewards/frontier_coverage_15": 0.045683811977505685, "rewards/frontier_coverage_20": 0.045683811977505685, "rewards/frontier_coverage_25": 0.045683811977505685, "rewards/frontier_coverage_5": 0.045683811977505685, "rewards/frontier_entropy_batch_reward": -0.27396737039089203, "signal/accuracy_reward/centered_abs_mean": 0.15417209565639495, "signal/accuracy_reward/group_std_mean": 0.20770005285739898, "signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9896841287612915, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07708604782819747, "signal/advantage_abs_mean": 0.7525755763053894, "signal/advantage_pre_scale_abs_mean": 0.08958911299705505, "signal/advantage_pre_scale_std": 0.14011250436306, "signal/advantage_std": 0.9831971049308776, "signal/brier_reward/centered_abs_mean": 0.13868292272090912, "signal/brier_reward/group_std_mean": 0.1801248759031296, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17836227416992187, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01386829260736704, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023434021696448325, "signal/confidence_uniqueness_reward/group_std_mean": 0.04013000652194023, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030173908919095993, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023434022441506386, "signal/format_reward/centered_abs_mean": 0.011105685587972402, "signal/format_reward/group_std_mean": 0.025266989693045618, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.07126235738396644, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005552842793986201, "signal/frontier_coverage_0/centered_abs_mean": 0.19150737822055816, "signal/frontier_coverage_0/group_std_mean": 0.25176058411598207, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0352290228009224, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027385556139051916, "signal/frontier_coverage_1/centered_abs_mean": 0.19150737822055816, "signal/frontier_coverage_1/group_std_mean": 0.25176058411598207, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0352290228009224, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027385556139051916, "signal/frontier_coverage_10/centered_abs_mean": 0.19150737822055816, "signal/frontier_coverage_10/group_std_mean": 0.25176058411598207, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0352290228009224, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027385556139051916, "signal/frontier_coverage_15/centered_abs_mean": 0.19150737822055816, "signal/frontier_coverage_15/group_std_mean": 0.25176058411598207, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0352290228009224, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027385556139051916, "signal/frontier_coverage_20/centered_abs_mean": 0.19150737822055816, "signal/frontier_coverage_20/group_std_mean": 0.25176058411598207, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0352290228009224, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027385556139051916, "signal/frontier_coverage_25/centered_abs_mean": 0.19150737822055816, "signal/frontier_coverage_25/group_std_mean": 0.25176058411598207, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0352290228009224, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027385556139051916, "signal/frontier_coverage_5/centered_abs_mean": 0.19150737822055816, "signal/frontier_coverage_5/group_std_mean": 0.25176058411598207, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0352290228009224, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027385556139051916, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32493494153022767, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39551191329956054, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.41893631815910337, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03249349407851696, "step": 140 }, { "calibration/aurc": 0.18233118348876434, "calibration/batch_distribution_entropy": 0.9831680569385256, "calibration/buffer_distribution_entropy": 0.9598290512239208, "calibration/confidence_entropy": 0.4897688461891715, "calibration/coverage@0%": 0.04602953909832603, "calibration/coverage@1%": 0.04602953909832603, "calibration/coverage@10%": 0.37472474066657796, "calibration/coverage@15%": 0.47529519217066457, "calibration/coverage@20%": 0.6035010202835199, "calibration/coverage@25%": 0.7062519554070845, "calibration/coverage@30%": 0.8091139749667903, "calibration/coverage@5%": 0.14104869190855002, "calibration/ece": 0.16769359034482495, "calibration/mean_confidence": 0.5345419757715583, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005468750000000022, "completions/max_length": 3000.6, "completions/max_terminated_length": 3000.6, "completions/mean_length": 603.790283203125, "completions/mean_terminated_length": 607.152490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 175.6, "epoch": 0.34799565005437433, "grad_norm": 0.004024908412247896, "learning_rate": 1.8975903614457832e-06, "loss": -0.0099, "num_tokens": 296382888.0, "reward": 1.0045419931411743, "reward_std": 0.10734798014163971, "rewards/accuracy_reward": 0.7182291746139526, "rewards/brier_reward": 0.797440505027771, "rewards/confidence_uniqueness_reward": 0.9454038500785827, "rewards/format_reward": 0.9941840171813965, "rewards/frontier_coverage_0": -0.007990724965929985, "rewards/frontier_coverage_1": -0.007990724965929985, "rewards/frontier_coverage_10": -0.007990724965929985, "rewards/frontier_coverage_15": -0.007990724965929985, "rewards/frontier_coverage_20": -0.007990724965929985, "rewards/frontier_coverage_25": -0.0050966314971446994, "rewards/frontier_coverage_5": -0.007990724965929985, "rewards/frontier_entropy_batch_reward": -0.25190583765506747, "signal/accuracy_reward/centered_abs_mean": 0.12996961772441865, "signal/accuracy_reward/group_std_mean": 0.18141130805015565, "signal/accuracy_reward/group_zero_std_frac": 0.44166667461395265, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9297018647193909, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06498480886220932, "signal/advantage_abs_mean": 0.7460544824600219, "signal/advantage_pre_scale_abs_mean": 0.07965542376041412, "signal/advantage_pre_scale_std": 0.12871635258197783, "signal/advantage_std": 0.9830474495887757, "signal/brier_reward/centered_abs_mean": 0.1287109524011612, "signal/brier_reward/group_std_mean": 0.16658840775489808, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18521082699298858, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012871095538139343, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02220791019499302, "signal/confidence_uniqueness_reward/group_std_mean": 0.033020298555493356, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.031845220178365705, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022207909962162374, "signal/format_reward/centered_abs_mean": 0.009825303871184587, "signal/format_reward/group_std_mean": 0.017753782123327254, "signal/format_reward/group_zero_std_frac": 0.9277778029441833, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06998921409249306, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004912651935592293, "signal/frontier_coverage_0/centered_abs_mean": 0.17401364147663118, "signal/frontier_coverage_0/group_std_mean": 0.23170343041419983, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035727670043706895, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024883949663490057, "signal/frontier_coverage_1/centered_abs_mean": 0.17401364147663118, "signal/frontier_coverage_1/group_std_mean": 0.23170343041419983, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035727670043706895, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024883949663490057, "signal/frontier_coverage_10/centered_abs_mean": 0.17401364147663118, "signal/frontier_coverage_10/group_std_mean": 0.23170343041419983, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.035727670043706895, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024883949663490057, "signal/frontier_coverage_15/centered_abs_mean": 0.17401364147663118, "signal/frontier_coverage_15/group_std_mean": 0.23170343041419983, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.035727670043706895, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024883949663490057, "signal/frontier_coverage_20/centered_abs_mean": 0.17401364147663118, "signal/frontier_coverage_20/group_std_mean": 0.23170343041419983, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.035727670043706895, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024883949663490057, "signal/frontier_coverage_25/centered_abs_mean": 0.15727486312389374, "signal/frontier_coverage_25/group_std_mean": 0.20966436564922333, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03222865499556064, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022490305360406636, "signal/frontier_coverage_5/centered_abs_mean": 0.17401364147663118, "signal/frontier_coverage_5/group_std_mean": 0.23170343041419983, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035727670043706895, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024883949663490057, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30758561491966246, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3782775580883026, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.44249006509780886, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030758562684059142, "step": 145 }, { "calibration/aurc": 0.19084036200929547, "calibration/batch_distribution_entropy": 0.9682289496498896, "calibration/buffer_distribution_entropy": 0.9670171964368471, "calibration/confidence_entropy": 0.5042296552565959, "calibration/coverage@0%": 0.06598648652575967, "calibration/coverage@1%": 0.0748870100859691, "calibration/coverage@10%": 0.36375211584191663, "calibration/coverage@15%": 0.4656610314172543, "calibration/coverage@20%": 0.5612869886858138, "calibration/coverage@25%": 0.6531385987815492, "calibration/coverage@30%": 0.7883200065274152, "calibration/coverage@5%": 0.29452257920003927, "calibration/ece": 0.18335817487450937, "calibration/mean_confidence": 0.5571029847794488, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004774305555555558, "completions/max_length": 3239.2, "completions/max_terminated_length": 3239.2, "completions/mean_length": 701.5655395507813, "completions/mean_terminated_length": 704.9570678710937, "completions/min_length": 0.0, "completions/min_terminated_length": 189.6, "epoch": 0.3599955000562493, "grad_norm": 0.0036906444001942873, "learning_rate": 1.7469879518072292e-06, "loss": -0.0084, "num_tokens": 307575259.0, "reward": 0.9870135068893433, "reward_std": 0.12090798169374466, "rewards/accuracy_reward": 0.6818576455116272, "rewards/brier_reward": 0.8064930081367493, "rewards/confidence_uniqueness_reward": 0.9434916973114014, "rewards/format_reward": 0.9942708134651184, "rewards/frontier_coverage_0": 0.02103922632522881, "rewards/frontier_coverage_1": 0.02103922632522881, "rewards/frontier_coverage_10": 0.02103922632522881, "rewards/frontier_coverage_15": 0.02103922632522881, "rewards/frontier_coverage_20": 0.021864201012067496, "rewards/frontier_coverage_25": 0.04642558991909027, "rewards/frontier_coverage_5": 0.02103922632522881, "rewards/frontier_entropy_batch_reward": -0.28530060350894926, "signal/accuracy_reward/centered_abs_mean": 0.1652289465069771, "signal/accuracy_reward/group_std_mean": 0.2149661064147949, "signal/accuracy_reward/group_zero_std_frac": 0.4, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0628995418548584, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08261447325348854, "signal/advantage_abs_mean": 0.768857729434967, "signal/advantage_pre_scale_abs_mean": 0.09277653992176056, "signal/advantage_pre_scale_std": 0.141487255692482, "signal/advantage_std": 0.9831639409065247, "signal/brier_reward/centered_abs_mean": 0.13207932710647582, "signal/brier_reward/group_std_mean": 0.16995641589164734, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17336148023605347, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013207933306694031, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02226933278143406, "signal/confidence_uniqueness_reward/group_std_mean": 0.0336017731577158, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029386086389422417, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002226933324709535, "signal/format_reward/centered_abs_mean": 0.008897569379769266, "signal/format_reward/group_std_mean": 0.017280596494674682, "signal/format_reward/group_zero_std_frac": 0.925000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05693276599049568, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004448784689884633, "signal/frontier_coverage_0/centered_abs_mean": 0.18552227616310119, "signal/frontier_coverage_0/group_std_mean": 0.24258872568607331, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03458261713385582, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002652968605980277, "signal/frontier_coverage_1/centered_abs_mean": 0.18552227616310119, "signal/frontier_coverage_1/group_std_mean": 0.24258872568607331, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03458261713385582, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002652968605980277, "signal/frontier_coverage_10/centered_abs_mean": 0.18552227616310119, "signal/frontier_coverage_10/group_std_mean": 0.24258872568607331, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03458261713385582, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002652968605980277, "signal/frontier_coverage_15/centered_abs_mean": 0.18552227616310119, "signal/frontier_coverage_15/group_std_mean": 0.24258872568607331, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03458261713385582, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002652968605980277, "signal/frontier_coverage_20/centered_abs_mean": 0.17669001817703248, "signal/frontier_coverage_20/group_std_mean": 0.23156578838825226, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03303196430206299, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002526667295023799, "signal/frontier_coverage_25/centered_abs_mean": 0.06840592995285988, "signal/frontier_coverage_25/group_std_mean": 0.08922984004020691, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01295476108789444, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009782047942280768, "signal/frontier_coverage_5/centered_abs_mean": 0.18552227616310119, "signal/frontier_coverage_5/group_std_mean": 0.24258872568607331, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03458261713385582, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002652968605980277, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32859750390052794, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3984165847301483, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4377071440219879, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03285974971950054, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 0.13702523864879065, "eval_calibration/batch_distribution_entropy": 0.9284841274227499, "eval_calibration/buffer_distribution_entropy": 0.9708200938328989, "eval_calibration/confidence_entropy": 0.4971687058154246, "eval_calibration/coverage@0%": 0.26293682795698925, "eval_calibration/coverage@1%": 0.26293682795698925, "eval_calibration/coverage@10%": 0.5304099462365591, "eval_calibration/coverage@15%": 0.6609543010752689, "eval_calibration/coverage@20%": 0.7239583333333334, "eval_calibration/coverage@25%": 0.8385416666666666, "eval_calibration/coverage@30%": 0.9322916666666666, "eval_calibration/coverage@5%": 0.28897849462365593, "eval_calibration/ece": 0.1888974403920867, "eval_calibration/mean_confidence": 0.5458924104876176, "eval_completions/clipped_ratio": 0.0026041666666666665, "eval_completions/max_length": 2327.3333333333335, "eval_completions/max_terminated_length": 2327.3333333333335, "eval_completions/mean_length": 692.9316202799479, "eval_completions/mean_terminated_length": 694.7150370279948, "eval_completions/min_length": 158.5, "eval_completions/min_terminated_length": 230.16666666666666, "eval_loss": 0.0, "eval_num_tokens": 307575259.0, "eval_reward": 0.9107913474241892, "eval_reward_std": 0.2171098291873932, "eval_rewards/accuracy_reward": 0.6788194378217062, "eval_rewards/brier_reward": 0.8023928701877594, "eval_rewards/confidence_uniqueness_reward": 0.8938767115275065, "eval_rewards/format_reward": 0.9973958333333334, "eval_rewards/frontier_coverage_0": 0.02134424013396104, "eval_rewards/frontier_coverage_1": 0.02134424013396104, "eval_rewards/frontier_coverage_10": 0.02134424013396104, "eval_rewards/frontier_coverage_15": 0.02134424013396104, "eval_rewards/frontier_coverage_20": 0.02452502477293213, "eval_rewards/frontier_coverage_25": 0.0642988532781601, "eval_rewards/frontier_coverage_5": 0.02134424013396104, "eval_rewards/frontier_entropy_batch_reward": -0.9973958333333334, "eval_runtime": 139.2104, "eval_samples_per_second": 7.183, "eval_signal/accuracy_reward/centered_abs_mean": 0.4271918435891469, "eval_signal/accuracy_reward/group_std_mean": 0.4689544787009557, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9881373941898346, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21359592179457346, "eval_signal/advantage_abs_mean": 0.8833253582318624, "eval_signal/advantage_pre_scale_abs_mean": 0.19192364563544592, "eval_signal/advantage_pre_scale_std": 0.21469872941573462, "eval_signal/advantage_std": 0.986367384592692, "eval_signal/brier_reward/centered_abs_mean": 0.18057803561290106, "eval_signal/brier_reward/group_std_mean": 0.23535025119781494, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08358132963379224, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.018057803623378277, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0436480101197958, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05746622569859028, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020154597237706184, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0043648009886965156, "eval_signal/format_reward/centered_abs_mean": 0.0050455727614462376, "eval_signal/format_reward/group_std_mean": 0.014731391333043575, "eval_signal/format_reward/group_zero_std_frac": 0.9166666766007742, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011093226571877798, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.29528985420862836, "eval_signal/frontier_coverage_0/group_std_mean": 0.4035186717907588, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.019564516842365265, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004222644803424676, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.29528985420862836, "eval_signal/frontier_coverage_1/group_std_mean": 0.4035186717907588, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.019564516842365265, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004222644803424676, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.29528985420862836, "eval_signal/frontier_coverage_10/group_std_mean": 0.4035186717907588, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.019564516842365265, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004222644803424676, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.29528985420862836, "eval_signal/frontier_coverage_15/group_std_mean": 0.4035186717907588, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019564516842365265, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004222644803424676, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.23646595080693564, "eval_signal/frontier_coverage_20/group_std_mean": 0.32997279862562817, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015677123485753935, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003381463116966188, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.09063521524270375, "eval_signal/frontier_coverage_25/group_std_mean": 0.11626108984152476, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006016027880832553, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001296083559282124, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.29528985420862836, "eval_signal/frontier_coverage_5/group_std_mean": 0.4035186717907588, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.019564516842365265, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004222644803424676, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0050455727614462376, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.014731391333043575, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666766007742, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0022186453764637313, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0005045573343522847, "eval_steps_per_second": 0.043, "step": 150 }, { "calibration/aurc": 0.14161491473574045, "calibration/batch_distribution_entropy": 0.9785979939228288, "calibration/buffer_distribution_entropy": 0.9725325260012785, "calibration/confidence_entropy": 0.5026786576895561, "calibration/coverage@0%": 0.030372153493650255, "calibration/coverage@1%": 0.030372153493650255, "calibration/coverage@10%": 0.48445113108853616, "calibration/coverage@15%": 0.6141158725686856, "calibration/coverage@20%": 0.7234010943737882, "calibration/coverage@25%": 0.850517394479151, "calibration/coverage@30%": 0.8960138340191106, "calibration/coverage@5%": 0.2579811082970747, "calibration/ece": 0.1825885505288956, "calibration/mean_confidence": 0.5590493941529713, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003993055555555558, "completions/max_length": 3360.2, "completions/max_terminated_length": 3360.2, "completions/mean_length": 680.30234375, "completions/mean_terminated_length": 683.0581176757812, "completions/min_length": 0.0, "completions/min_terminated_length": 207.4, "epoch": 0.3719953500581243, "grad_norm": 0.003881107782945037, "learning_rate": 1.5963855421686747e-06, "loss": -0.0034, "num_tokens": 318520054.0, "reward": 1.0143356561660766, "reward_std": 0.11488137692213059, "rewards/accuracy_reward": 0.7287326455116272, "rewards/brier_reward": 0.8206124186515809, "rewards/confidence_uniqueness_reward": 0.9463862299919128, "rewards/format_reward": 0.9959201574325561, "rewards/frontier_coverage_0": 0.008624611730920152, "rewards/frontier_coverage_1": 0.008624611730920152, "rewards/frontier_coverage_10": 0.008623575296951458, "rewards/frontier_coverage_15": 0.008593602268956602, "rewards/frontier_coverage_20": 0.019597085565328597, "rewards/frontier_coverage_25": 0.09769158065319061, "rewards/frontier_coverage_5": 0.008624611730920152, "rewards/frontier_entropy_batch_reward": -0.26984030604362486, "signal/accuracy_reward/centered_abs_mean": 0.15015733242034912, "signal/accuracy_reward/group_std_mean": 0.20241186618804932, "signal/accuracy_reward/group_zero_std_frac": 0.4083333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0236715793609619, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07507866621017456, "signal/advantage_abs_mean": 0.7554642915725708, "signal/advantage_pre_scale_abs_mean": 0.0852625235915184, "signal/advantage_pre_scale_std": 0.13472781628370284, "signal/advantage_std": 0.9831080079078675, "signal/brier_reward/centered_abs_mean": 0.1314813494682312, "signal/brier_reward/group_std_mean": 0.16985029578208924, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18111539185047149, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013148135691881179, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020419245585799217, "signal/confidence_uniqueness_reward/group_std_mean": 0.03257020190358162, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02790914885699749, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020419245585799215, "signal/format_reward/centered_abs_mean": 0.007546658022329211, "signal/format_reward/group_std_mean": 0.016791296564042567, "signal/format_reward/group_zero_std_frac": 0.9222222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05060187578201294, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0037733290111646054, "signal/frontier_coverage_0/centered_abs_mean": 0.19345370233058928, "signal/frontier_coverage_0/group_std_mean": 0.2506356716156006, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03803465738892555, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027663879096508025, "signal/frontier_coverage_1/centered_abs_mean": 0.19345370233058928, "signal/frontier_coverage_1/group_std_mean": 0.2506356716156006, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03803465738892555, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027663879096508025, "signal/frontier_coverage_10/centered_abs_mean": 0.19344922304153442, "signal/frontier_coverage_10/group_std_mean": 0.2506299793720245, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03803384155035019, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002766323834657669, "signal/frontier_coverage_15/centered_abs_mean": 0.19292726516723632, "signal/frontier_coverage_15/group_std_mean": 0.2499801516532898, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03793646469712257, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027588598895817995, "signal/frontier_coverage_20/centered_abs_mean": 0.1353215456008911, "signal/frontier_coverage_20/group_std_mean": 0.17713548839092255, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02666233666241169, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001935098133981228, "signal/frontier_coverage_25/centered_abs_mean": 0.08023149967193603, "signal/frontier_coverage_25/group_std_mean": 0.10117035806179046, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015790591202676296, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001147310435771942, "signal/frontier_coverage_5/centered_abs_mean": 0.19345370233058928, "signal/frontier_coverage_5/group_std_mean": 0.2506356716156006, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03803465738892555, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027663879096508025, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3274266362190247, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39751541018486025, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4508472442626953, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03274266496300697, "step": 155 }, { "calibration/aurc": 0.09399491959647298, "calibration/batch_distribution_entropy": 0.9412546711952994, "calibration/buffer_distribution_entropy": 0.9754604476125126, "calibration/confidence_entropy": 0.4770566756690894, "calibration/coverage@0%": 0.09393830903780045, "calibration/coverage@1%": 0.09393830903780045, "calibration/coverage@10%": 0.7389721201296986, "calibration/coverage@15%": 0.843777086332959, "calibration/coverage@20%": 0.880931680869679, "calibration/coverage@25%": 0.9050462602561364, "calibration/coverage@30%": 0.9182058047493402, "calibration/coverage@5%": 0.5374245034399536, "calibration/ece": 0.16942426673168062, "calibration/mean_confidence": 0.6137393061839143, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006770833333333326, "completions/max_length": 3504.0, "completions/max_terminated_length": 3504.0, "completions/mean_length": 693.0671875, "completions/mean_terminated_length": 697.8524291992187, "completions/min_length": 0.0, "completions/min_terminated_length": 187.6, "epoch": 0.38399520005999926, "grad_norm": 0.003740179119631648, "learning_rate": 1.4457831325301204e-06, "loss": -0.0148, "num_tokens": 329591484.0, "reward": 0.9867475509643555, "reward_std": 0.1178449347615242, "rewards/accuracy_reward": 0.6817708253860474, "rewards/brier_reward": 0.8016348242759704, "rewards/confidence_uniqueness_reward": 0.9426298260688781, "rewards/format_reward": 0.9927083253860474, "rewards/frontier_coverage_0": 0.02299555651843548, "rewards/frontier_coverage_1": 0.02299555651843548, "rewards/frontier_coverage_10": 0.02299380600452423, "rewards/frontier_coverage_15": 0.022937557473778725, "rewards/frontier_coverage_20": 0.027411183714866637, "rewards/frontier_coverage_25": 0.10199409276247025, "rewards/frontier_coverage_5": 0.02299555651843548, "rewards/frontier_entropy_batch_reward": -0.28412319123744967, "signal/accuracy_reward/centered_abs_mean": 0.14393446147441863, "signal/accuracy_reward/group_std_mean": 0.1882144957780838, "signal/accuracy_reward/group_zero_std_frac": 0.47222222089767457, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0052073359489442, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07196723073720931, "signal/advantage_abs_mean": 0.7603908181190491, "signal/advantage_pre_scale_abs_mean": 0.08789721131324768, "signal/advantage_pre_scale_std": 0.14016545712947845, "signal/advantage_std": 0.9830867886543274, "signal/brier_reward/centered_abs_mean": 0.13717943131923677, "signal/brier_reward/group_std_mean": 0.17664145231246947, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19173648059368134, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013717942871153355, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025140639021992685, "signal/confidence_uniqueness_reward/group_std_mean": 0.041734833270311356, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.034909750893712045, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025140638928860424, "signal/format_reward/centered_abs_mean": 0.012847222201526166, "signal/format_reward/group_std_mean": 0.026805402338504793, "signal/format_reward/group_zero_std_frac": 0.8805555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08747010976076126, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006423611100763083, "signal/frontier_coverage_0/centered_abs_mean": 0.1893948495388031, "signal/frontier_coverage_0/group_std_mean": 0.24652081727981567, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037821638584136966, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002708346350118518, "signal/frontier_coverage_1/centered_abs_mean": 0.1893948495388031, "signal/frontier_coverage_1/group_std_mean": 0.24652081727981567, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037821638584136966, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002708346350118518, "signal/frontier_coverage_10/centered_abs_mean": 0.18938452005386353, "signal/frontier_coverage_10/group_std_mean": 0.2465077221393585, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.037819582223892215, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002708198828622699, "signal/frontier_coverage_15/centered_abs_mean": 0.18859367370605468, "signal/frontier_coverage_15/group_std_mean": 0.24550087451934816, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03766307979822159, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026968895457684995, "signal/frontier_coverage_20/centered_abs_mean": 0.1072016030550003, "signal/frontier_coverage_20/group_std_mean": 0.139630264043808, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02150789238512516, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015329829417169093, "signal/frontier_coverage_25/centered_abs_mean": 0.08940613269805908, "signal/frontier_coverage_25/group_std_mean": 0.11217249184846878, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017939681187272072, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012785077095031738, "signal/frontier_coverage_5/centered_abs_mean": 0.1893948495388031, "signal/frontier_coverage_5/group_std_mean": 0.24652081727981567, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.037821638584136966, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002708346350118518, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32551802396774293, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39456554055213927, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45712563395500183, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03255180567502976, "step": 160 }, { "calibration/aurc": 0.12667895964624948, "calibration/batch_distribution_entropy": 0.9667500631719177, "calibration/buffer_distribution_entropy": 0.9768246962707771, "calibration/confidence_entropy": 0.4787488938658747, "calibration/coverage@0%": 0.08587612683228549, "calibration/coverage@1%": 0.08587612683228549, "calibration/coverage@10%": 0.5596784657665852, "calibration/coverage@15%": 0.658714518716747, "calibration/coverage@20%": 0.7396279283724964, "calibration/coverage@25%": 0.8107754068022667, "calibration/coverage@30%": 0.8813963204972873, "calibration/coverage@5%": 0.3803046427900564, "calibration/ece": 0.20473769314593335, "calibration/mean_confidence": 0.5073165586899123, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009895833333333348, "completions/max_length": 3408.6, "completions/max_terminated_length": 3408.6, "completions/mean_length": 766.8527099609375, "completions/mean_terminated_length": 774.5172241210937, "completions/min_length": 0.0, "completions/min_terminated_length": 247.6, "epoch": 0.39599505006187424, "grad_norm": 0.0031843557953834534, "learning_rate": 1.2951807228915664e-06, "loss": -0.0273, "num_tokens": 341564699.0, "reward": 0.9761411070823669, "reward_std": 0.11881034970283508, "rewards/accuracy_reward": 0.6574652791023254, "rewards/brier_reward": 0.7964750170707703, "rewards/confidence_uniqueness_reward": 0.9407760500907898, "rewards/format_reward": 0.9895833253860473, "rewards/frontier_coverage_0": 0.04257221892476082, "rewards/frontier_coverage_1": 0.04257221892476082, "rewards/frontier_coverage_10": 0.04257510676980018, "rewards/frontier_coverage_15": 0.04277926944196224, "rewards/frontier_coverage_20": 0.046192364767193794, "rewards/frontier_coverage_25": 0.10422454476356506, "rewards/frontier_coverage_5": 0.04257221892476082, "rewards/frontier_entropy_batch_reward": -0.26306197941303255, "signal/accuracy_reward/centered_abs_mean": 0.1366536423563957, "signal/accuracy_reward/group_std_mean": 0.18269501626491547, "signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9548314452171326, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06832682117819786, "signal/advantage_abs_mean": 0.7397696733474731, "signal/advantage_pre_scale_abs_mean": 0.08615766167640686, "signal/advantage_pre_scale_std": 0.1432511866092682, "signal/advantage_std": 0.9830728769302368, "signal/brier_reward/centered_abs_mean": 0.13853301703929902, "signal/brier_reward/group_std_mean": 0.17815548181533813, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1958606421947479, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013853302784264087, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02927153408527374, "signal/confidence_uniqueness_reward/group_std_mean": 0.05202648937702179, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04150531962513924, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002927153604105115, "signal/format_reward/centered_abs_mean": 0.01867404468357563, "signal/format_reward/group_std_mean": 0.03949138410389423, "signal/format_reward/group_zero_std_frac": 0.8222222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1326592281460762, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009337022341787815, "signal/frontier_coverage_0/centered_abs_mean": 0.20547735095024108, "signal/frontier_coverage_0/group_std_mean": 0.2617917537689209, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.041419435292482376, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002938326168805361, "signal/frontier_coverage_1/centered_abs_mean": 0.20547735095024108, "signal/frontier_coverage_1/group_std_mean": 0.2617917537689209, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.041419435292482376, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002938326168805361, "signal/frontier_coverage_10/centered_abs_mean": 0.20546633899211883, "signal/frontier_coverage_10/group_std_mean": 0.2617780089378357, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04141724780201912, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029381686355918644, "signal/frontier_coverage_15/centered_abs_mean": 0.2041507601737976, "signal/frontier_coverage_15/group_std_mean": 0.2601293295621872, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04115338325500488, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029193558264523746, "signal/frontier_coverage_20/centered_abs_mean": 0.09043723046779632, "signal/frontier_coverage_20/group_std_mean": 0.11598165482282638, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018281865678727627, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012932523852214218, "signal/frontier_coverage_25/centered_abs_mean": 0.0844599574804306, "signal/frontier_coverage_25/group_std_mean": 0.10744838416576385, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017018306627869607, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012077773921191693, "signal/frontier_coverage_5/centered_abs_mean": 0.20547735095024108, "signal/frontier_coverage_5/group_std_mean": 0.2617917537689209, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.041419435292482376, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002938326168805361, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32049464583396914, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39367471933364867, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4526883363723755, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03204946555197239, "step": 165 }, { "calibration/aurc": 0.12042923465880181, "calibration/batch_distribution_entropy": 0.9231336191494597, "calibration/buffer_distribution_entropy": 0.9772784581546675, "calibration/confidence_entropy": 0.4722359481486856, "calibration/coverage@0%": 0.02619224996737286, "calibration/coverage@1%": 0.02619224996737286, "calibration/coverage@10%": 0.5754376567915199, "calibration/coverage@15%": 0.6780955222879599, "calibration/coverage@20%": 0.8664804654079841, "calibration/coverage@25%": 0.9255724249938371, "calibration/coverage@30%": 0.9763779527559056, "calibration/coverage@5%": 0.21658759117472193, "calibration/ece": 0.11244536955518544, "calibration/mean_confidence": 0.6432379778464519, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010677083333333327, "completions/max_length": 3379.8, "completions/max_terminated_length": 3379.8, "completions/mean_length": 764.9436767578125, "completions/mean_terminated_length": 773.263427734375, "completions/min_length": 0.0, "completions/min_terminated_length": 232.4, "epoch": 0.4079949000637492, "grad_norm": 0.0034331590868532658, "learning_rate": 1.1445783132530121e-06, "loss": -0.0318, "num_tokens": 353466034.0, "reward": 1.001962959766388, "reward_std": 0.12031054049730301, "rewards/accuracy_reward": 0.7228298544883728, "rewards/brier_reward": 0.8268496513366699, "rewards/confidence_uniqueness_reward": 0.9339795589447022, "rewards/format_reward": 0.9889756798744201, "rewards/frontier_coverage_0": 0.018969600554555655, "rewards/frontier_coverage_1": 0.018969600554555655, "rewards/frontier_coverage_10": 0.01897885270882398, "rewards/frontier_coverage_15": 0.02006477633258328, "rewards/frontier_coverage_20": 0.05282620638608933, "rewards/frontier_coverage_25": 0.15781393945217131, "rewards/frontier_coverage_5": 0.018969600554555655, "rewards/frontier_entropy_batch_reward": -0.34407015442848204, "signal/accuracy_reward/centered_abs_mean": 0.13177625983953475, "signal/accuracy_reward/group_std_mean": 0.1782878965139389, "signal/accuracy_reward/group_zero_std_frac": 0.48055556416511536, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.957055127620697, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06588812991976738, "signal/advantage_abs_mean": 0.7518744587898254, "signal/advantage_pre_scale_abs_mean": 0.08772747218608856, "signal/advantage_pre_scale_std": 0.14746004790067674, "signal/advantage_std": 0.9830225229263305, "signal/brier_reward/centered_abs_mean": 0.12697158604860306, "signal/brier_reward/group_std_mean": 0.16580133736133576, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18501022160053254, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012697158567607402, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03250643089413643, "signal/confidence_uniqueness_reward/group_std_mean": 0.054881346970796586, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0470227912068367, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003250643378123641, "signal/format_reward/centered_abs_mean": 0.01877712644636631, "signal/format_reward/group_std_mean": 0.03860697820782662, "signal/format_reward/group_zero_std_frac": 0.8305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.13337477520108224, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009388563223183155, "signal/frontier_coverage_0/centered_abs_mean": 0.1576144963502884, "signal/frontier_coverage_0/group_std_mean": 0.21049812138080598, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03282146006822586, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022538872435688972, "signal/frontier_coverage_1/centered_abs_mean": 0.1576144963502884, "signal/frontier_coverage_1/group_std_mean": 0.21049812138080598, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03282146006822586, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022538872435688972, "signal/frontier_coverage_10/centered_abs_mean": 0.15755284130573272, "signal/frontier_coverage_10/group_std_mean": 0.2104198604822159, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03280867114663124, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022530056070536376, "signal/frontier_coverage_15/centered_abs_mean": 0.1547604590654373, "signal/frontier_coverage_15/group_std_mean": 0.20678509175777435, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03222393654286861, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002213074592873454, "signal/frontier_coverage_20/centered_abs_mean": 0.06509168595075607, "signal/frontier_coverage_20/group_std_mean": 0.08332156985998154, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01360565610229969, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009308110922574997, "signal/frontier_coverage_25/centered_abs_mean": 0.10484070777893066, "signal/frontier_coverage_25/group_std_mean": 0.13356612026691436, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02198589891195297, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014992221491411327, "signal/frontier_coverage_5/centered_abs_mean": 0.1576144963502884, "signal/frontier_coverage_5/group_std_mean": 0.21049812138080598, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03282146006822586, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022538872435688972, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34670414328575133, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41445213556289673, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5086399018764496, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03467041626572609, "step": 170 }, { "calibration/aurc": 0.09662731950335442, "calibration/batch_distribution_entropy": 0.967024441243046, "calibration/buffer_distribution_entropy": 0.9772981503148188, "calibration/confidence_entropy": 0.5046960184786891, "calibration/coverage@0%": 0.08649086696074063, "calibration/coverage@1%": 0.1979207803453709, "calibration/coverage@10%": 0.6744557656695441, "calibration/coverage@15%": 0.7621469822036061, "calibration/coverage@20%": 0.850881153068473, "calibration/coverage@25%": 0.9165939939987974, "calibration/coverage@30%": 0.9591402590245247, "calibration/coverage@5%": 0.3844438926821537, "calibration/ece": 0.16998348481003883, "calibration/mean_confidence": 0.5691553376825024, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009201388888888907, "completions/max_length": 3807.2, "completions/max_terminated_length": 3807.2, "completions/mean_length": 819.2960205078125, "completions/mean_terminated_length": 826.9532104492188, "completions/min_length": 0.0, "completions/min_terminated_length": 258.4, "epoch": 0.4199947500656242, "grad_norm": 0.0033503889571875334, "learning_rate": 9.93975903614458e-07, "loss": -0.0224, "num_tokens": 366012292.0, "reward": 1.0019341111183167, "reward_std": 0.12585299015045165, "rewards/accuracy_reward": 0.7077257037162781, "rewards/brier_reward": 0.8151641726493836, "rewards/confidence_uniqueness_reward": 0.9411271095275879, "rewards/format_reward": 0.9901041746139526, "rewards/frontier_coverage_0": 0.014120917581021786, "rewards/frontier_coverage_1": 0.014120917581021786, "rewards/frontier_coverage_10": 0.014142588526010514, "rewards/frontier_coverage_15": 0.014934336580336095, "rewards/frontier_coverage_20": 0.053513363003730774, "rewards/frontier_coverage_25": 0.1392007663846016, "rewards/frontier_coverage_5": 0.014121649414300918, "rewards/frontier_entropy_batch_reward": -0.26387418806552887, "signal/accuracy_reward/centered_abs_mean": 0.15167643427848815, "signal/accuracy_reward/group_std_mean": 0.2053221881389618, "signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.008397400379181, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07583821713924407, "signal/advantage_abs_mean": 0.7449156880378723, "signal/advantage_pre_scale_abs_mean": 0.09188491851091385, "signal/advantage_pre_scale_std": 0.1496666193008423, "signal/advantage_std": 0.9831571817398072, "signal/brier_reward/centered_abs_mean": 0.12859825491905214, "signal/brier_reward/group_std_mean": 0.16735298037528992, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1708065688610077, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012859826162457465, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02758704237639904, "signal/confidence_uniqueness_reward/group_std_mean": 0.045367203652858734, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03653429411351681, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027587041724473236, "signal/format_reward/centered_abs_mean": 0.016514757089316844, "signal/format_reward/group_std_mean": 0.032019348442554475, "signal/format_reward/group_zero_std_frac": 0.8638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10936851501464843, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008257378544658422, "signal/frontier_coverage_0/centered_abs_mean": 0.18040508925914764, "signal/frontier_coverage_0/group_std_mean": 0.23544572591781615, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03435261063277721, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025797927286475898, "signal/frontier_coverage_1/centered_abs_mean": 0.18040508925914764, "signal/frontier_coverage_1/group_std_mean": 0.23544572591781615, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03435261063277721, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025797927286475898, "signal/frontier_coverage_10/centered_abs_mean": 0.18032192587852477, "signal/frontier_coverage_10/group_std_mean": 0.235341677069664, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03433669619262218, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025786036625504495, "signal/frontier_coverage_15/centered_abs_mean": 0.1752968579530716, "signal/frontier_coverage_15/group_std_mean": 0.22891083657741546, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.033378247171640396, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002506745047867298, "signal/frontier_coverage_20/centered_abs_mean": 0.06402314454317093, "signal/frontier_coverage_20/group_std_mean": 0.08175744861364365, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012168211303651333, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009155309875495732, "signal/frontier_coverage_25/centered_abs_mean": 0.09939492493867874, "signal/frontier_coverage_25/group_std_mean": 0.12737924307584764, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018856792896986007, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001421347470022738, "signal/frontier_coverage_5/centered_abs_mean": 0.18040342926979064, "signal/frontier_coverage_5/group_std_mean": 0.23544372022151946, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.034352288022637366, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002579768933355808, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3185386657714844, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3876194655895233, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.42217653393745425, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0318538673222065, "step": 175 }, { "calibration/aurc": 0.08526290527356786, "calibration/batch_distribution_entropy": 0.9492263091629465, "calibration/buffer_distribution_entropy": 0.9778171307399122, "calibration/confidence_entropy": 0.49997622614018694, "calibration/coverage@0%": 0.05113034229386461, "calibration/coverage@1%": 0.09365002733323467, "calibration/coverage@10%": 0.7473668466249658, "calibration/coverage@15%": 0.8666818025240637, "calibration/coverage@20%": 0.9463922907613297, "calibration/coverage@25%": 0.9740053050397878, "calibration/coverage@30%": 0.9946949602122016, "calibration/coverage@5%": 0.3784139398978424, "calibration/ece": 0.166516695461729, "calibration/mean_confidence": 0.6187747295165235, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009982638888888862, "completions/max_length": 3491.4, "completions/max_terminated_length": 3491.4, "completions/mean_length": 794.2389770507813, "completions/mean_terminated_length": 802.2543701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 254.2, "epoch": 0.4319946000674992, "grad_norm": 0.003332644235342741, "learning_rate": 8.433734939759036e-07, "loss": -0.0247, "num_tokens": 378261893.0, "reward": 1.0071517705917359, "reward_std": 0.12117233127355576, "rewards/accuracy_reward": 0.725781238079071, "rewards/brier_reward": 0.8080682635307312, "rewards/confidence_uniqueness_reward": 0.938874113559723, "rewards/format_reward": 0.9896701335906982, "rewards/frontier_coverage_0": -0.003929438255727291, "rewards/frontier_coverage_1": -0.003929438255727291, "rewards/frontier_coverage_10": -0.0038942765444517136, "rewards/frontier_coverage_15": -0.004395973100326955, "rewards/frontier_coverage_20": 0.05808563455939293, "rewards/frontier_coverage_25": 0.14676995277404786, "rewards/frontier_coverage_5": -0.003927422594279051, "rewards/frontier_entropy_batch_reward": -0.27910477519035337, "signal/accuracy_reward/centered_abs_mean": 0.1342068150639534, "signal/accuracy_reward/group_std_mean": 0.17787945568561553, "signal/accuracy_reward/group_zero_std_frac": 0.4888888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9742860913276672, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0671034075319767, "signal/advantage_abs_mean": 0.7551454067230224, "signal/advantage_pre_scale_abs_mean": 0.08928989768028259, "signal/advantage_pre_scale_std": 0.14911575615406036, "signal/advantage_std": 0.9830282688140869, "signal/brier_reward/centered_abs_mean": 0.1267807200551033, "signal/brier_reward/group_std_mean": 0.16448463797569274, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18453309237957, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012678072415292263, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029533155634999274, "signal/confidence_uniqueness_reward/group_std_mean": 0.050531229376792906, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04347648099064827, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029533156659454106, "signal/format_reward/centered_abs_mean": 0.018115234375, "signal/format_reward/group_std_mean": 0.036967866495251654, "signal/format_reward/group_zero_std_frac": 0.8361111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.13411470055580138, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0090576171875, "signal/frontier_coverage_0/centered_abs_mean": 0.1632930189371109, "signal/frontier_coverage_0/group_std_mean": 0.21255632638931274, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03398923799395561, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023350901901721954, "signal/frontier_coverage_1/centered_abs_mean": 0.1632930189371109, "signal/frontier_coverage_1/group_std_mean": 0.21255632638931274, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03398923799395561, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023350901901721954, "signal/frontier_coverage_10/centered_abs_mean": 0.1632182240486145, "signal/frontier_coverage_10/group_std_mean": 0.21246310472488403, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03397372327744961, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023340205661952496, "signal/frontier_coverage_15/centered_abs_mean": 0.15129518806934356, "signal/frontier_coverage_15/group_std_mean": 0.1972308337688446, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03146095797419548, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021635211771354078, "signal/frontier_coverage_20/centered_abs_mean": 0.0666133850812912, "signal/frontier_coverage_20/group_std_mean": 0.08397864252328872, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01388755403459072, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009525713743641972, "signal/frontier_coverage_25/centered_abs_mean": 0.10736925154924393, "signal/frontier_coverage_25/group_std_mean": 0.13568062484264373, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.022355619445443155, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015353802824392914, "signal/frontier_coverage_5/centered_abs_mean": 0.16328986287117003, "signal/frontier_coverage_5/group_std_mean": 0.21255233883857727, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.033988584950566295, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002335045067593455, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.321364963054657, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38714643120765685, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4681834578514099, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03213649578392506, "step": 180 }, { "calibration/aurc": 0.147813420478697, "calibration/batch_distribution_entropy": 0.9615515889414834, "calibration/buffer_distribution_entropy": 0.9768709358020142, "calibration/confidence_entropy": 0.48301767115068583, "calibration/coverage@0%": 0.021637299525751035, "calibration/coverage@1%": 0.021637299525751035, "calibration/coverage@10%": 0.29680602288229385, "calibration/coverage@15%": 0.7420242355127942, "calibration/coverage@20%": 0.8569570879698016, "calibration/coverage@25%": 0.938528549490437, "calibration/coverage@30%": 0.9830238726790451, "calibration/coverage@5%": 0.04941376570265051, "calibration/ece": 0.18628405567119197, "calibration/mean_confidence": 0.5851708761244938, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010503472222222232, "completions/max_length": 3336.4, "completions/max_terminated_length": 3336.4, "completions/mean_length": 798.7246704101562, "completions/mean_terminated_length": 807.3341674804688, "completions/min_length": 0.0, "completions/min_terminated_length": 219.0, "epoch": 0.44399445006937416, "grad_norm": 0.0037687718868255615, "learning_rate": 6.927710843373495e-07, "loss": -0.026, "num_tokens": 390553249.0, "reward": 0.9922348737716675, "reward_std": 0.1255135342478752, "rewards/accuracy_reward": 0.6885416626930236, "rewards/brier_reward": 0.8148928165435791, "rewards/confidence_uniqueness_reward": 0.939547860622406, "rewards/format_reward": 0.9893229126930236, "rewards/frontier_coverage_0": 0.02959106657654047, "rewards/frontier_coverage_1": 0.02959106657654047, "rewards/frontier_coverage_10": 0.029620955046266318, "rewards/frontier_coverage_15": 0.030684778385329993, "rewards/frontier_coverage_20": 0.07125861793756486, "rewards/frontier_coverage_25": 0.15161574482917786, "rewards/frontier_coverage_5": 0.029591639526188374, "rewards/frontier_entropy_batch_reward": -0.2746046096086502, "signal/accuracy_reward/centered_abs_mean": 0.15325520634651185, "signal/accuracy_reward/group_std_mean": 0.20086986124515532, "signal/accuracy_reward/group_zero_std_frac": 0.42777777910232545, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.050264871120453, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07662760317325593, "signal/advantage_abs_mean": 0.7525103449821472, "signal/advantage_pre_scale_abs_mean": 0.09348735362291336, "signal/advantage_pre_scale_std": 0.15054976642131807, "signal/advantage_std": 0.9831097364425659, "signal/brier_reward/centered_abs_mean": 0.13104279041290284, "signal/brier_reward/group_std_mean": 0.16971164345741271, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18007910549640654, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013104279339313508, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02803487591445446, "signal/confidence_uniqueness_reward/group_std_mean": 0.04839293137192726, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.038316420093178746, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028034877497702835, "signal/format_reward/centered_abs_mean": 0.01681315116584301, "signal/format_reward/group_std_mean": 0.03504555374383926, "signal/format_reward/group_zero_std_frac": 0.8416666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1139563001692295, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008406575582921506, "signal/frontier_coverage_0/centered_abs_mean": 0.1829205185174942, "signal/frontier_coverage_0/group_std_mean": 0.23725357055664062, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035879862308502194, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026157634798437356, "signal/frontier_coverage_1/centered_abs_mean": 0.1829205185174942, "signal/frontier_coverage_1/group_std_mean": 0.23725357055664062, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035879862308502194, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026157634798437356, "signal/frontier_coverage_10/centered_abs_mean": 0.18279743790626526, "signal/frontier_coverage_10/group_std_mean": 0.23710041046142577, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03585578799247742, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026140033267438413, "signal/frontier_coverage_15/centered_abs_mean": 0.16599198877811433, "signal/frontier_coverage_15/group_std_mean": 0.2160373091697693, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03256378434598446, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023736854549497367, "signal/frontier_coverage_20/centered_abs_mean": 0.07057978808879853, "signal/frontier_coverage_20/group_std_mean": 0.08835373222827911, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.013883821666240692, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010092909797094762, "signal/frontier_coverage_25/centered_abs_mean": 0.10900045037269593, "signal/frontier_coverage_25/group_std_mean": 0.13839271068572997, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02145172506570816, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015587064437568188, "signal/frontier_coverage_5/centered_abs_mean": 0.18291856944561005, "signal/frontier_coverage_5/group_std_mean": 0.23725113570690154, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03587948232889175, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002615735540166497, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3228137791156769, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39099804162979124, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.44452112913131714, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032281379029154775, "step": 185 }, { "calibration/aurc": 0.10794396304820482, "calibration/batch_distribution_entropy": 0.9318776316967142, "calibration/buffer_distribution_entropy": 0.9766401009144714, "calibration/confidence_entropy": 0.48874635286794427, "calibration/coverage@0%": 0.02941019738198198, "calibration/coverage@1%": 0.02941019738198198, "calibration/coverage@10%": 0.566576001128133, "calibration/coverage@15%": 0.7814061012715239, "calibration/coverage@20%": 0.9241913307868799, "calibration/coverage@25%": 0.9963446475195823, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.39669069301030124, "calibration/ece": 0.17917218190945836, "calibration/mean_confidence": 0.6358024523294838, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006770833333333326, "completions/max_length": 3528.6, "completions/max_terminated_length": 3528.6, "completions/mean_length": 782.6207641601562, "completions/mean_terminated_length": 787.9698608398437, "completions/min_length": 0.0, "completions/min_terminated_length": 245.4, "epoch": 0.45599430007124914, "grad_norm": 0.004032325465232134, "learning_rate": 5.421686746987952e-07, "loss": -0.0193, "num_tokens": 402651984.0, "reward": 1.0132048964500426, "reward_std": 0.11641700565814972, "rewards/accuracy_reward": 0.7356770753860473, "rewards/brier_reward": 0.8206545829772949, "rewards/confidence_uniqueness_reward": 0.9407994031906128, "rewards/format_reward": 0.9932291626930236, "rewards/frontier_coverage_0": 0.005415836116299033, "rewards/frontier_coverage_1": 0.005415836116299033, "rewards/frontier_coverage_10": 0.005474161216989159, "rewards/frontier_coverage_15": 0.009392570797353983, "rewards/frontier_coverage_20": 0.0829792320728302, "rewards/frontier_coverage_25": 0.17440233528614044, "rewards/frontier_coverage_5": 0.005415877094492316, "rewards/frontier_entropy_batch_reward": -0.31519128680229186, "signal/accuracy_reward/centered_abs_mean": 0.14232313483953477, "signal/accuracy_reward/group_std_mean": 0.18668197989463806, "signal/accuracy_reward/group_zero_std_frac": 0.4694444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0415077209472656, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07116156741976738, "signal/advantage_abs_mean": 0.7595191597938538, "signal/advantage_pre_scale_abs_mean": 0.08674072474241257, "signal/advantage_pre_scale_std": 0.14214332550764083, "signal/advantage_std": 0.9830122709274292, "signal/brier_reward/centered_abs_mean": 0.12559207975864412, "signal/brier_reward/group_std_mean": 0.16448963582515716, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18423607349395751, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01255920883268118, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025163330510258673, "signal/confidence_uniqueness_reward/group_std_mean": 0.0417561799287796, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0369965672492981, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002516333060339093, "signal/format_reward/centered_abs_mean": 0.0123046875, "signal/format_reward/group_std_mean": 0.0261313796043396, "signal/format_reward/group_zero_std_frac": 0.8833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08963212668895722, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00615234375, "signal/frontier_coverage_0/centered_abs_mean": 0.17310574054718017, "signal/frontier_coverage_0/group_std_mean": 0.22561688125133514, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036221811175346376, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024754120968282223, "signal/frontier_coverage_1/centered_abs_mean": 0.17310574054718017, "signal/frontier_coverage_1/group_std_mean": 0.22561688125133514, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036221811175346376, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024754120968282223, "signal/frontier_coverage_10/centered_abs_mean": 0.17299672365188598, "signal/frontier_coverage_10/group_std_mean": 0.22547802329063416, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.036198879778385165, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024738531094044445, "signal/frontier_coverage_15/centered_abs_mean": 0.15460915565490724, "signal/frontier_coverage_15/group_std_mean": 0.20132783949375152, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03233877532184124, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022109109442681072, "signal/frontier_coverage_20/centered_abs_mean": 0.07516934722661972, "signal/frontier_coverage_20/group_std_mean": 0.09350252896547318, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015893686562776566, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010749216424301266, "signal/frontier_coverage_25/centered_abs_mean": 0.11685722768306732, "signal/frontier_coverage_25/group_std_mean": 0.14719865024089812, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.024771924316883086, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016710583819076418, "signal/frontier_coverage_5/centered_abs_mean": 0.1731052041053772, "signal/frontier_coverage_5/group_std_mean": 0.22561621367931367, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0362217016518116, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024754045065492392, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33898064494132996, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40483956336975097, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.500599205493927, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03389806374907493, "step": 190 }, { "calibration/aurc": 0.16990665201284244, "calibration/batch_distribution_entropy": 0.9763861379418843, "calibration/buffer_distribution_entropy": 0.9756461996279331, "calibration/confidence_entropy": 0.4907997092983952, "calibration/coverage@0%": 0.04279541816108065, "calibration/coverage@1%": 0.04279541816108065, "calibration/coverage@10%": 0.3558397094996633, "calibration/coverage@15%": 0.48278590372538516, "calibration/coverage@20%": 0.6133848323851534, "calibration/coverage@25%": 0.8433561312198542, "calibration/coverage@30%": 0.8935762652705062, "calibration/coverage@5%": 0.19339417144192056, "calibration/ece": 0.1626894563658173, "calibration/mean_confidence": 0.5491100531123216, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0057291666666666515, "completions/max_length": 3678.0, "completions/max_terminated_length": 3678.0, "completions/mean_length": 815.7873168945313, "completions/mean_terminated_length": 820.5162841796875, "completions/min_length": 0.0, "completions/min_terminated_length": 215.8, "epoch": 0.46799415007312406, "grad_norm": 0.0038923481479287148, "learning_rate": 3.91566265060241e-07, "loss": -0.0136, "num_tokens": 415130718.0, "reward": 0.993002200126648, "reward_std": 0.11667114496231079, "rewards/accuracy_reward": 0.6849826335906982, "rewards/brier_reward": 0.8041805863380432, "rewards/confidence_uniqueness_reward": 0.9449598073959351, "rewards/format_reward": 0.9940104246139526, "rewards/frontier_coverage_0": 0.023747061751782893, "rewards/frontier_coverage_1": 0.023747061751782893, "rewards/frontier_coverage_10": 0.023766111955046652, "rewards/frontier_coverage_15": 0.027177707105875016, "rewards/frontier_coverage_20": 0.07301479429006577, "rewards/frontier_coverage_25": 0.1472606360912323, "rewards/frontier_coverage_5": 0.023747061751782893, "rewards/frontier_entropy_batch_reward": -0.26305546462535856, "signal/accuracy_reward/centered_abs_mean": 0.14096679538488388, "signal/accuracy_reward/group_std_mean": 0.19119617640972136, "signal/accuracy_reward/group_zero_std_frac": 0.43055555820465086, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9744200944900513, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07048339769244194, "signal/advantage_abs_mean": 0.7451924324035645, "signal/advantage_pre_scale_abs_mean": 0.08635586649179458, "signal/advantage_pre_scale_std": 0.13891534209251405, "signal/advantage_std": 0.9830895900726319, "signal/brier_reward/centered_abs_mean": 0.13546755462884902, "signal/brier_reward/group_std_mean": 0.17547394037246705, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18838207721710204, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01354675628244877, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02242642156779766, "signal/confidence_uniqueness_reward/group_std_mean": 0.03652404025197029, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.031294023245573045, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022426421754062177, "signal/format_reward/centered_abs_mean": 0.010574001539498567, "signal/format_reward/group_std_mean": 0.022073457762598992, "signal/format_reward/group_zero_std_frac": 0.9000000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.07329605147242546, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0052870007697492834, "signal/frontier_coverage_0/centered_abs_mean": 0.1862773597240448, "signal/frontier_coverage_0/group_std_mean": 0.24120102524757386, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03693324699997902, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026637662667781115, "signal/frontier_coverage_1/centered_abs_mean": 0.1862773597240448, "signal/frontier_coverage_1/group_std_mean": 0.24120102524757386, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03693324699997902, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026637662667781115, "signal/frontier_coverage_10/centered_abs_mean": 0.1861777275800705, "signal/frontier_coverage_10/group_std_mean": 0.2410757929086685, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.036913507431745526, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026623413898050783, "signal/frontier_coverage_15/centered_abs_mean": 0.1608198195695877, "signal/frontier_coverage_15/group_std_mean": 0.20852963030338287, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.031917137652635576, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022997234016656877, "signal/frontier_coverage_20/centered_abs_mean": 0.07476909160614013, "signal/frontier_coverage_20/group_std_mean": 0.09405903220176696, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.014968187920749188, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010691980132833123, "signal/frontier_coverage_25/centered_abs_mean": 0.1143454447388649, "signal/frontier_coverage_25/group_std_mean": 0.145367094874382, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.022907671332359315, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00163513976149261, "signal/frontier_coverage_5/centered_abs_mean": 0.1862773597240448, "signal/frontier_coverage_5/group_std_mean": 0.24120102524757386, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03693324699997902, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026637662667781115, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31905388832092285, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39239723086357114, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.44702168107032775, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031905388832092284, "step": 195 }, { "calibration/aurc": 0.14559693191858122, "calibration/batch_distribution_entropy": 0.9603313286311146, "calibration/buffer_distribution_entropy": 0.9761309450014393, "calibration/confidence_entropy": 0.5010567276644734, "calibration/coverage@0%": 0.016820017762200986, "calibration/coverage@1%": 0.016820017762200986, "calibration/coverage@10%": 0.3913710631099086, "calibration/coverage@15%": 0.6379543175606383, "calibration/coverage@20%": 0.8828092626318101, "calibration/coverage@25%": 0.9403797520265744, "calibration/coverage@30%": 0.9712192254589695, "calibration/coverage@5%": 0.14037182782463625, "calibration/ece": 0.1773901327159586, "calibration/mean_confidence": 0.5998158055084197, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006944444444444442, "completions/max_length": 3106.8, "completions/max_terminated_length": 3106.8, "completions/mean_length": 786.4341918945313, "completions/mean_terminated_length": 791.9395629882813, "completions/min_length": 0.0, "completions/min_terminated_length": 230.2, "epoch": 0.47999400007499904, "grad_norm": 0.0037775628734380007, "learning_rate": 2.409638554216868e-07, "loss": -0.0112, "num_tokens": 427258248.0, "reward": 0.9977959275245667, "reward_std": 0.1135590761899948, "rewards/accuracy_reward": 0.692968738079071, "rewards/brier_reward": 0.8162111163139343, "rewards/confidence_uniqueness_reward": 0.9434576988220215, "rewards/format_reward": 0.9928819417953492, "rewards/frontier_coverage_0": 0.03058276418596506, "rewards/frontier_coverage_1": 0.03058276418596506, "rewards/frontier_coverage_10": 0.030608633439987896, "rewards/frontier_coverage_15": 0.034898260794579986, "rewards/frontier_coverage_20": 0.0822305366396904, "rewards/frontier_coverage_25": 0.1586170792579651, "rewards/frontier_coverage_5": 0.03058276418596506, "rewards/frontier_entropy_batch_reward": -0.26789160668849943, "signal/accuracy_reward/centered_abs_mean": 0.13543294370174408, "signal/accuracy_reward/group_std_mean": 0.1795397073030472, "signal/accuracy_reward/group_zero_std_frac": 0.4833333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9879457116127014, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06771647185087204, "signal/advantage_abs_mean": 0.7566372156143188, "signal/advantage_pre_scale_abs_mean": 0.08582200407981873, "signal/advantage_pre_scale_std": 0.13774674534797668, "signal/advantage_std": 0.9830132961273194, "signal/brier_reward/centered_abs_mean": 0.12440891414880753, "signal/brier_reward/group_std_mean": 0.16094110310077667, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1832536369562149, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012440891563892364, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023261058330535888, "signal/confidence_uniqueness_reward/group_std_mean": 0.0364607434719801, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0340243112295866, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002326105860993266, "signal/format_reward/centered_abs_mean": 0.011458333395421506, "signal/format_reward/group_std_mean": 0.021999170631170274, "signal/format_reward/group_zero_std_frac": 0.9027777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08310093134641647, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005729166697710753, "signal/frontier_coverage_0/centered_abs_mean": 0.1727170765399933, "signal/frontier_coverage_0/group_std_mean": 0.22679380774497987, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03630736693739891, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002469854103401303, "signal/frontier_coverage_1/centered_abs_mean": 0.1727170765399933, "signal/frontier_coverage_1/group_std_mean": 0.22679380774497987, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03630736693739891, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002469854103401303, "signal/frontier_coverage_10/centered_abs_mean": 0.17263288795948029, "signal/frontier_coverage_10/group_std_mean": 0.22668661475181578, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03628960847854614, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024686503689736127, "signal/frontier_coverage_15/centered_abs_mean": 0.13041841089725495, "signal/frontier_coverage_15/group_std_mean": 0.172030445933342, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.027530809864401817, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018649833044037222, "signal/frontier_coverage_20/centered_abs_mean": 0.07613101899623871, "signal/frontier_coverage_20/group_std_mean": 0.09484113454818725, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016059026680886747, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010886735515668988, "signal/frontier_coverage_25/centered_abs_mean": 0.11801900565624238, "signal/frontier_coverage_25/group_std_mean": 0.14812451601028442, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.024889787659049034, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016876716865226627, "signal/frontier_coverage_5/centered_abs_mean": 0.1727170765399933, "signal/frontier_coverage_5/group_std_mean": 0.22679380774497987, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03630736693739891, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002469854103401303, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31580972075462344, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38427644968032837, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.46509563326835635, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03158097080886364, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 0.16057820298915118, "eval_calibration/batch_distribution_entropy": 0.914145618708674, "eval_calibration/buffer_distribution_entropy": 0.9753044494695922, "eval_calibration/confidence_entropy": 0.4683044711172521, "eval_calibration/coverage@0%": 0.22395833333333334, "eval_calibration/coverage@1%": 0.22395833333333334, "eval_calibration/coverage@10%": 0.5677083333333334, "eval_calibration/coverage@15%": 0.721606182795699, "eval_calibration/coverage@20%": 0.81065188172043, "eval_calibration/coverage@25%": 0.873991935483871, "eval_calibration/coverage@30%": 0.947244623655914, "eval_calibration/coverage@5%": 0.2708333333333333, "eval_calibration/ece": 0.2455773573190524, "eval_calibration/mean_confidence": 0.5615903357669692, "eval_completions/clipped_ratio": 0.002604166666666685, "eval_completions/max_length": 2298.3333333333335, "eval_completions/max_terminated_length": 2298.3333333333335, "eval_completions/mean_length": 798.5604349772135, "eval_completions/mean_terminated_length": 800.6705627441406, "eval_completions/min_length": 134.0, "eval_completions/min_terminated_length": 276.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 427258248.0, "eval_reward": 0.9212540686130524, "eval_reward_std": 0.2215366984407107, "eval_rewards/accuracy_reward": 0.6866319378217062, "eval_rewards/brier_reward": 0.8282056351502737, "eval_rewards/confidence_uniqueness_reward": 0.8917240798473358, "eval_rewards/format_reward": 0.9973958432674408, "eval_rewards/frontier_coverage_0": 0.047293830662965775, "eval_rewards/frontier_coverage_1": 0.047293830662965775, "eval_rewards/frontier_coverage_10": 0.04729795269668102, "eval_rewards/frontier_coverage_15": 0.04440750305851301, "eval_rewards/frontier_coverage_20": 0.08812103296319644, "eval_rewards/frontier_coverage_25": 0.16687769691149393, "eval_rewards/frontier_coverage_5": 0.047292555992801986, "eval_rewards/frontier_entropy_batch_reward": -0.9973958432674408, "eval_runtime": 166.4081, "eval_samples_per_second": 6.009, "eval_signal/accuracy_reward/centered_abs_mean": 0.4191080729166667, "eval_signal/accuracy_reward/group_std_mean": 0.4639366815487544, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9535810748736063, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20955403645833334, "eval_signal/advantage_abs_mean": 0.8791789809862772, "eval_signal/advantage_pre_scale_abs_mean": 0.1954043780763944, "eval_signal/advantage_pre_scale_std": 0.21952204157908758, "eval_signal/advantage_std": 0.9863749047120413, "eval_signal/brier_reward/centered_abs_mean": 0.16421432544787726, "eval_signal/brier_reward/group_std_mean": 0.22416182110706964, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0745612805088361, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.016421433072537184, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04440143456061681, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06044746252397696, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020217653984824818, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004440143549193938, "eval_signal/format_reward/centered_abs_mean": 0.0050455727614462376, "eval_signal/format_reward/group_std_mean": 0.014731391333043575, "eval_signal/format_reward/group_zero_std_frac": 0.9166666865348816, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011211627162992954, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.296345055103302, "eval_signal/frontier_coverage_0/group_std_mean": 0.4005911747614543, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01929074029127757, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004237734169388811, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.296345055103302, "eval_signal/frontier_coverage_1/group_std_mean": 0.4005911747614543, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01929074029127757, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004237734169388811, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.296173761288325, "eval_signal/frontier_coverage_10/group_std_mean": 0.40038461486498517, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0192795991897583, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004235284713407357, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.17665722717841467, "eval_signal/frontier_coverage_15/group_std_mean": 0.24827261020739874, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01150304094577829, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025261982421701155, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.10272979860504468, "eval_signal/frontier_coverage_20/group_std_mean": 0.13188674176732698, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006687632451454799, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014690360403619707, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.19587110231320062, "eval_signal/frontier_coverage_25/group_std_mean": 0.24194891502459845, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01274662526945273, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002800956523666779, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.29633869727452594, "eval_signal/frontier_coverage_5/group_std_mean": 0.40058427552382153, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.019290315608183544, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004237643443048, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0050455727614462376, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.014731391333043575, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666865348816, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0022423254946867623, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0005045572955471774, "eval_steps_per_second": 0.036, "step": 200 }, { "calibration/aurc": 0.1186967407697009, "calibration/batch_distribution_entropy": 0.9550775718290982, "calibration/buffer_distribution_entropy": 0.9759632840081753, "calibration/confidence_entropy": 0.47419539528335974, "calibration/coverage@0%": 0.026565226876090753, "calibration/coverage@1%": 0.026565226876090753, "calibration/coverage@10%": 0.4854984729493892, "calibration/coverage@15%": 0.8464511950409872, "calibration/coverage@20%": 0.9174506613422097, "calibration/coverage@25%": 0.9550392670157068, "calibration/coverage@30%": 0.9853403141361257, "calibration/coverage@5%": 0.3022660340314136, "calibration/ece": 0.18733712170138508, "calibration/mean_confidence": 0.6067382862272922, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004947916666666652, "completions/max_length": 3626.8, "completions/max_terminated_length": 3626.8, "completions/mean_length": 804.612939453125, "completions/mean_terminated_length": 808.594140625, "completions/min_length": 0.0, "completions/min_terminated_length": 237.4, "epoch": 0.491993850076874, "grad_norm": 0.0037702268455177546, "learning_rate": 9.036144578313253e-08, "loss": -0.0034, "num_tokens": 439593341.0, "reward": 1.0242285490036012, "reward_std": 0.11114266216754913, "rewards/accuracy_reward": 0.7535590410232544, "rewards/brier_reward": 0.8131016492843628, "rewards/confidence_uniqueness_reward": 0.94401034116745, "rewards/format_reward": 0.9950520753860473, "rewards/frontier_coverage_0": -0.009763723891228437, "rewards/frontier_coverage_1": -0.009763723891228437, "rewards/frontier_coverage_10": -0.00971116297878325, "rewards/frontier_coverage_15": 0.01904887929558754, "rewards/frontier_coverage_20": 0.0988022267818451, "rewards/frontier_coverage_25": 0.18960395753383635, "rewards/frontier_coverage_5": -0.0097591457888484, "rewards/frontier_entropy_batch_reward": -0.29627181887626647, "signal/accuracy_reward/centered_abs_mean": 0.13992513120174407, "signal/accuracy_reward/group_std_mean": 0.18550328016281128, "signal/accuracy_reward/group_zero_std_frac": 0.4638888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0376464486122132, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06996256560087204, "signal/advantage_abs_mean": 0.7619848847389221, "signal/advantage_pre_scale_abs_mean": 0.08387549370527267, "signal/advantage_pre_scale_std": 0.1350790113210678, "signal/advantage_std": 0.9830043315887451, "signal/brier_reward/centered_abs_mean": 0.129868845641613, "signal/brier_reward/group_std_mean": 0.1658725470304489, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1923790842294693, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012986884266138077, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02169004678726196, "signal/confidence_uniqueness_reward/group_std_mean": 0.03326268345117569, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03214513845741749, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002169004688039422, "signal/format_reward/centered_abs_mean": 0.008241102285683154, "signal/format_reward/group_std_mean": 0.01677692960947752, "signal/format_reward/group_zero_std_frac": 0.9250000238418579, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06075965389609337, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004120551142841577, "signal/frontier_coverage_0/centered_abs_mean": 0.18534817099571227, "signal/frontier_coverage_0/group_std_mean": 0.23780874013900757, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.039334161579608916, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026504788547754288, "signal/frontier_coverage_1/centered_abs_mean": 0.18534817099571227, "signal/frontier_coverage_1/group_std_mean": 0.23780874013900757, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.039334161579608916, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026504788547754288, "signal/frontier_coverage_10/centered_abs_mean": 0.1850076824426651, "signal/frontier_coverage_10/group_std_mean": 0.23738227784633636, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.039263205230236055, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026456098072230815, "signal/frontier_coverage_15/centered_abs_mean": 0.09948588758707047, "signal/frontier_coverage_15/group_std_mean": 0.12873928546905516, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021109068393707277, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014226482482627035, "signal/frontier_coverage_20/centered_abs_mean": 0.08255493640899658, "signal/frontier_coverage_20/group_std_mean": 0.10234281718730927, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01751541830599308, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011805356247350574, "signal/frontier_coverage_25/centered_abs_mean": 0.12076869606971741, "signal/frontier_coverage_25/group_std_mean": 0.15139889121055602, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.025616540387272835, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017269923351705073, "signal/frontier_coverage_5/centered_abs_mean": 0.18533942103385925, "signal/frontier_coverage_5/group_std_mean": 0.2377980649471283, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03933229818940163, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002650353778153658, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3371506452560425, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40471735000610354, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5003003001213073, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03371506631374359, "step": 205 }, { "calibration/aurc": 0.1272282988853613, "calibration/batch_distribution_entropy": 0.9597042176053079, "calibration/buffer_distribution_entropy": 0.9764644981690312, "calibration/confidence_entropy": 0.4988459667423297, "calibration/coverage@0%": 0.03220648332796521, "calibration/coverage@1%": 0.03220648332796521, "calibration/coverage@10%": 0.42679129860725634, "calibration/coverage@15%": 0.716446344990166, "calibration/coverage@20%": 0.8498751624715318, "calibration/coverage@25%": 0.9440389794617708, "calibration/coverage@30%": 0.9772528433945756, "calibration/coverage@5%": 0.08715098602230857, "calibration/ece": 0.13103508682212492, "calibration/mean_confidence": 0.6140400005582972, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0028935185185185266, "completions/max_length": 3365.0, "completions/max_terminated_length": 3365.0, "completions/mean_length": 805.1265869140625, "completions/mean_terminated_length": 807.4395955403646, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.49919376007799904, "num_tokens": 447024424.0, "reward": 1.0055522521336873, "reward_std": 0.10900399088859558, "rewards/accuracy_reward": 0.7063078681627909, "rewards/brier_reward": 0.8075371583302816, "rewards/confidence_uniqueness_reward": 0.9471040964126587, "rewards/format_reward": 0.9971064925193787, "rewards/frontier_coverage_0": 0.008651394241799911, "rewards/frontier_coverage_1": 0.008651394241799911, "rewards/frontier_coverage_10": 0.008720822011431059, "rewards/frontier_coverage_15": 0.03024888038635254, "rewards/frontier_coverage_20": 0.09493551154931386, "rewards/frontier_coverage_25": 0.173322523633639, "rewards/frontier_coverage_5": 0.008652187573413054, "rewards/frontier_entropy_batch_reward": -0.2638363142808278, "signal/accuracy_reward/centered_abs_mean": 0.1340241606036822, "signal/accuracy_reward/group_std_mean": 0.18462320665518442, "signal/accuracy_reward/group_zero_std_frac": 0.4583333333333333, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0006801684697468, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0670120803018411, "signal/advantage_abs_mean": 0.7406850457191467, "signal/advantage_pre_scale_abs_mean": 0.07968033105134964, "signal/advantage_pre_scale_std": 0.12958685557047525, "signal/advantage_std": 0.9829863905906677, "signal/brier_reward/centered_abs_mean": 0.12960121283928552, "signal/brier_reward/group_std_mean": 0.16745843489964804, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19411064187685648, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012960121346016725, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01816164267559846, "signal/confidence_uniqueness_reward/group_std_mean": 0.02970569891234239, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027298261721928913, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001816164197710653, "signal/format_reward/centered_abs_mean": 0.0055157696673025685, "signal/format_reward/group_std_mean": 0.014287550002336502, "signal/format_reward/group_zero_std_frac": 0.9259259502092997, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.04154850294192632, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0027578848336512842, "signal/frontier_coverage_0/centered_abs_mean": 0.17834581434726715, "signal/frontier_coverage_0/group_std_mean": 0.23521957298119864, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.038279421627521515, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00255034522463878, "signal/frontier_coverage_1/centered_abs_mean": 0.17834581434726715, "signal/frontier_coverage_1/group_std_mean": 0.23521957298119864, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038279421627521515, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00255034522463878, "signal/frontier_coverage_10/centered_abs_mean": 0.17744634052117667, "signal/frontier_coverage_10/group_std_mean": 0.23404847085475922, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.038087598979473114, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025374825733403363, "signal/frontier_coverage_15/centered_abs_mean": 0.07600981990496318, "signal/frontier_coverage_15/group_std_mean": 0.099979134897391, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016342710082729656, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010869404068216681, "signal/frontier_coverage_20/centered_abs_mean": 0.08222619444131851, "signal/frontier_coverage_20/group_std_mean": 0.10303841282924016, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01762464890877406, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011758345644921064, "signal/frontier_coverage_25/centered_abs_mean": 0.12105090419451396, "signal/frontier_coverage_25/group_std_mean": 0.15320136646429697, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02592242571214835, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017310279266287882, "signal/frontier_coverage_5/centered_abs_mean": 0.17833813031514487, "signal/frontier_coverage_5/group_std_mean": 0.23520942529042563, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0382777601480484, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002550235173354546, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3061721622943878, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37579457958539325, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4601670801639557, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03061721660196781, "step": 208, "total_flos": 0.0, "train_loss": -0.022534527126341485, "train_runtime": 39191.2693, "train_samples_per_second": 0.383, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 447024424, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }