{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.49858621709600043, "calibration/batch_distribution_entropy": 0.27179949345286947, "calibration/buffer_distribution_entropy": 0.2826936735263452, "calibration/confidence_entropy": 0.22057572312827042, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.46227961186189803, "calibration/mean_confidence": 0.9144884743892769, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02005208333333335, "completions/max_length": 3998.4, "completions/max_terminated_length": 3998.4, "completions/mean_length": 516.7477416992188, "completions/mean_terminated_length": 527.3296142578125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011999850001874977, "grad_norm": 0.003858975600451231, "learning_rate": 5.952380952380953e-07, "loss": 0.0068, "num_tokens": 9067142.0, "reward": 0.5035168766975403, "reward_std": 0.44194251894950864, "rewards/accuracy_reward": 0.25572916567325593, "rewards/brier_reward": 0.3094047784805298, "rewards/confidence_uniqueness_reward": 0.28810680508613584, "rewards/format_reward": 0.5986111044883728, "rewards/frontier_coverage_0": 0.16578977052122354, "rewards/frontier_coverage_1": 0.16578977052122354, "rewards/frontier_coverage_10": 0.16578977052122354, "rewards/frontier_coverage_15": 0.16578977052122354, "rewards/frontier_coverage_20": 0.16578977052122354, "rewards/frontier_coverage_25": 0.16578977052122354, "rewards/frontier_coverage_5": 0.16578977052122354, "signal/accuracy_reward/centered_abs_mean": 0.3037217855453491, "signal/accuracy_reward/group_std_mean": 0.3625373482704163, "signal/accuracy_reward/group_zero_std_frac": 0.10555555745959282, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15186089277267456, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15186089277267456, "signal/advantage_abs_mean": 0.383181232213974, "signal/advantage_pre_scale_abs_mean": 0.383181232213974, "signal/advantage_pre_scale_std": 0.4454788088798523, "signal/advantage_std": 0.4454788088798523, "signal/brier_reward/centered_abs_mean": 0.31622138023376467, "signal/brier_reward/group_std_mean": 0.36951943635940554, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03162213787436485, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03162213787436485, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.237173992395401, "signal/confidence_uniqueness_reward/group_std_mean": 0.2888317406177521, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023717399314045907, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023717399314045907, "signal/format_reward/centered_abs_mean": 0.43920356035232544, "signal/format_reward/group_std_mean": 0.4745051324367523, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21960178017616272, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.21960178017616272, "signal/frontier_coverage_0/centered_abs_mean": 0.19068164750933647, "signal/frontier_coverage_0/group_std_mean": 0.23141059912741185, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027267476194538175, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027267476194538175, "signal/frontier_coverage_1/centered_abs_mean": 0.19068164750933647, "signal/frontier_coverage_1/group_std_mean": 0.23141059912741185, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027267476194538175, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027267476194538175, "signal/frontier_coverage_10/centered_abs_mean": 0.19068164750933647, "signal/frontier_coverage_10/group_std_mean": 0.23141059912741185, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027267476194538175, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027267476194538175, "signal/frontier_coverage_15/centered_abs_mean": 0.19068164750933647, "signal/frontier_coverage_15/group_std_mean": 0.23141059912741185, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027267476194538175, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027267476194538175, "signal/frontier_coverage_20/centered_abs_mean": 0.19068164750933647, "signal/frontier_coverage_20/group_std_mean": 0.23141059912741185, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027267476194538175, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027267476194538175, "signal/frontier_coverage_25/centered_abs_mean": 0.19068164750933647, "signal/frontier_coverage_25/group_std_mean": 0.23141059912741185, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027267476194538175, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027267476194538175, "signal/frontier_coverage_5/centered_abs_mean": 0.19068164750933647, "signal/frontier_coverage_5/group_std_mean": 0.23141059912741185, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027267476194538175, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027267476194538175, "step": 5 }, { "calibration/aurc": 0.5048620587670756, "calibration/batch_distribution_entropy": 0.23936490089336626, "calibration/buffer_distribution_entropy": 0.276244326153706, "calibration/confidence_entropy": 0.21518765364765557, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4767879771080269, "calibration/mean_confidence": 0.9237536204358829, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01918402777777779, "completions/max_length": 3910.2, "completions/max_terminated_length": 3910.2, "completions/mean_length": 477.1962585449219, "completions/mean_terminated_length": 486.66339721679685, "completions/min_length": 0.0, "completions/min_terminated_length": 21.2, "epoch": 0.023999700003749954, "grad_norm": 0.03838086128234863, "learning_rate": 1.1904761904761906e-06, "loss": 0.0025, "num_tokens": 17647163.0, "reward": 0.5732499718666076, "reward_std": 0.39466784000396726, "rewards/accuracy_reward": 0.290625, "rewards/brier_reward": 0.35456337332725524, "rewards/confidence_uniqueness_reward": 0.35486308932304383, "rewards/format_reward": 0.7128472208976746, "rewards/frontier_coverage_0": 0.00570630207657814, "rewards/frontier_coverage_1": 0.00570630207657814, "rewards/frontier_coverage_10": 0.00570630207657814, "rewards/frontier_coverage_15": 0.00570630207657814, "rewards/frontier_coverage_20": 0.00570630207657814, "rewards/frontier_coverage_25": 0.00570630207657814, "rewards/frontier_coverage_5": 0.00570630207657814, "signal/accuracy_reward/centered_abs_mean": 0.32489149570465087, "signal/accuracy_reward/group_std_mean": 0.3819944679737091, "signal/accuracy_reward/group_zero_std_frac": 0.0833333358168602, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16244574785232543, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16244574785232543, "signal/advantage_abs_mean": 0.32642056941986086, "signal/advantage_pre_scale_abs_mean": 0.32642056941986086, "signal/advantage_pre_scale_std": 0.3967562675476074, "signal/advantage_std": 0.3967562675476074, "signal/brier_reward/centered_abs_mean": 0.32057392597198486, "signal/brier_reward/group_std_mean": 0.3732548654079437, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.032057393342256546, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.032057393342256546, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.22309686243534088, "signal/confidence_uniqueness_reward/group_std_mean": 0.2798730194568634, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022309686988592148, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022309686988592148, "signal/format_reward/centered_abs_mean": 0.3567274272441864, "signal/format_reward/group_std_mean": 0.42118590474128725, "signal/format_reward/group_zero_std_frac": 0.00555555559694767, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1783637136220932, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1783637136220932, "signal/frontier_coverage_0/centered_abs_mean": 0.015685518644750117, "signal/frontier_coverage_0/group_std_mean": 0.0333976186811924, "signal/frontier_coverage_0/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00022430291573982686, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00022430291573982686, "signal/frontier_coverage_1/centered_abs_mean": 0.015685518644750117, "signal/frontier_coverage_1/group_std_mean": 0.0333976186811924, "signal/frontier_coverage_1/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00022430291573982686, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00022430291573982686, "signal/frontier_coverage_10/centered_abs_mean": 0.015685518644750117, "signal/frontier_coverage_10/group_std_mean": 0.0333976186811924, "signal/frontier_coverage_10/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00022430291573982686, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00022430291573982686, "signal/frontier_coverage_15/centered_abs_mean": 0.015685518644750117, "signal/frontier_coverage_15/group_std_mean": 0.0333976186811924, "signal/frontier_coverage_15/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00022430291573982686, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00022430291573982686, "signal/frontier_coverage_20/centered_abs_mean": 0.015685518644750117, "signal/frontier_coverage_20/group_std_mean": 0.0333976186811924, "signal/frontier_coverage_20/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00022430291573982686, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00022430291573982686, "signal/frontier_coverage_25/centered_abs_mean": 0.015685518644750117, "signal/frontier_coverage_25/group_std_mean": 0.0333976186811924, "signal/frontier_coverage_25/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00022430291573982686, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00022430291573982686, "signal/frontier_coverage_5/centered_abs_mean": 0.015685518644750117, "signal/frontier_coverage_5/group_std_mean": 0.0333976186811924, "signal/frontier_coverage_5/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00022430291573982686, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00022430291573982686, "step": 10 }, { "calibration/aurc": 0.5651461697219853, "calibration/batch_distribution_entropy": 0.2858721407601498, "calibration/buffer_distribution_entropy": 0.26435241204989707, "calibration/confidence_entropy": 0.23515916341087234, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5243550099379826, "calibration/mean_confidence": 0.9145028922999036, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011197916666666674, "completions/max_length": 3814.8, "completions/max_terminated_length": 3814.8, "completions/mean_length": 415.96303100585936, "completions/mean_terminated_length": 420.7157958984375, "completions/min_length": 0.0, "completions/min_terminated_length": 60.6, "epoch": 0.03599955000562493, "grad_norm": 0.001462470623664558, "learning_rate": 1.7857142857142859e-06, "loss": -0.0064, "num_tokens": 25541041.0, "reward": 0.7139440774917603, "reward_std": 0.2866878867149353, "rewards/accuracy_reward": 0.3111111164093018, "rewards/brier_reward": 0.4146228313446045, "rewards/confidence_uniqueness_reward": 0.5058956265449523, "rewards/format_reward": 0.9306423544883728, "rewards/frontier_coverage_0": 0.010144511703401804, "rewards/frontier_coverage_1": 0.010144511703401804, "rewards/frontier_coverage_10": 0.010144511703401804, "rewards/frontier_coverage_15": 0.010144511703401804, "rewards/frontier_coverage_20": 0.010144511703401804, "rewards/frontier_coverage_25": 0.010144511703401804, "rewards/frontier_coverage_5": 0.010144511703401804, "signal/accuracy_reward/centered_abs_mean": 0.3240451455116272, "signal/accuracy_reward/group_std_mean": 0.3799292385578156, "signal/accuracy_reward/group_zero_std_frac": 0.09444444701075554, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1620225727558136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1620225727558136, "signal/advantage_abs_mean": 0.22506832480430602, "signal/advantage_pre_scale_abs_mean": 0.22506832480430602, "signal/advantage_pre_scale_std": 0.29292616844177244, "signal/advantage_std": 0.29292616844177244, "signal/brier_reward/centered_abs_mean": 0.30433117747306826, "signal/brier_reward/group_std_mean": 0.3543997764587402, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030433119088411332, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.030433119088411332, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.17291399836540222, "signal/confidence_uniqueness_reward/group_std_mean": 0.22697044014930726, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01729139983654022, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01729139983654022, "signal/format_reward/centered_abs_mean": 0.11800672635436057, "signal/format_reward/group_std_mean": 0.20241138935089112, "signal/format_reward/group_zero_std_frac": 0.2583333317190409, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05900336317718029, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.05900336317718029, "signal/frontier_coverage_0/centered_abs_mean": 0.021325640380382538, "signal/frontier_coverage_0/group_std_mean": 0.04250783696770668, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00030495663813780995, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00030495663813780995, "signal/frontier_coverage_1/centered_abs_mean": 0.021325640380382538, "signal/frontier_coverage_1/group_std_mean": 0.04250783696770668, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00030495663813780995, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00030495663813780995, "signal/frontier_coverage_10/centered_abs_mean": 0.021325640380382538, "signal/frontier_coverage_10/group_std_mean": 0.04250783696770668, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00030495663813780995, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00030495663813780995, "signal/frontier_coverage_15/centered_abs_mean": 0.021325640380382538, "signal/frontier_coverage_15/group_std_mean": 0.04250783696770668, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00030495663813780995, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00030495663813780995, "signal/frontier_coverage_20/centered_abs_mean": 0.021325640380382538, "signal/frontier_coverage_20/group_std_mean": 0.04250783696770668, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00030495663813780995, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00030495663813780995, "signal/frontier_coverage_25/centered_abs_mean": 0.021325640380382538, "signal/frontier_coverage_25/group_std_mean": 0.04250783696770668, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00030495663813780995, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00030495663813780995, "signal/frontier_coverage_5/centered_abs_mean": 0.021325640380382538, "signal/frontier_coverage_5/group_std_mean": 0.04250783696770668, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00030495663813780995, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00030495663813780995, "step": 15 }, { "calibration/aurc": 0.4942660369668168, "calibration/batch_distribution_entropy": 0.36107236461197073, "calibration/buffer_distribution_entropy": 0.2866057794747389, "calibration/confidence_entropy": 0.28890118485400956, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.03717277486910995, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4425994634048266, "calibration/mean_confidence": 0.8945259513282633, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008767361111111116, "completions/max_length": 3545.2, "completions/max_terminated_length": 3545.2, "completions/mean_length": 423.8654479980469, "completions/mean_terminated_length": 427.660546875, "completions/min_length": 0.0, "completions/min_terminated_length": 68.8, "epoch": 0.04799940000749991, "grad_norm": 0.0008591993246227503, "learning_rate": 2.380952380952381e-06, "loss": -0.0082, "num_tokens": 33537667.0, "reward": 0.8044180393218994, "reward_std": 0.2287308543920517, "rewards/accuracy_reward": 0.4008680522441864, "rewards/brier_reward": 0.5150173962116241, "rewards/confidence_uniqueness_reward": 0.5862200736999512, "rewards/format_reward": 0.9855034828186036, "rewards/frontier_coverage_0": 0.011074092797935009, "rewards/frontier_coverage_1": 0.011074092797935009, "rewards/frontier_coverage_10": 0.011074092797935009, "rewards/frontier_coverage_15": 0.011074092797935009, "rewards/frontier_coverage_20": 0.011074092797935009, "rewards/frontier_coverage_25": 0.011074092797935009, "rewards/frontier_coverage_5": 0.011074092797935009, "signal/accuracy_reward/centered_abs_mean": 0.2977321982383728, "signal/accuracy_reward/group_std_mean": 0.3633489072322845, "signal/accuracy_reward/group_zero_std_frac": 0.08333333432674409, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1488660991191864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1488660991191864, "signal/advantage_abs_mean": 0.18184916377067567, "signal/advantage_pre_scale_abs_mean": 0.18184916377067567, "signal/advantage_pre_scale_std": 0.23659501671791078, "signal/advantage_std": 0.23659501671791078, "signal/brier_reward/centered_abs_mean": 0.26825318336486814, "signal/brier_reward/group_std_mean": 0.32477903366088867, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02682531885802746, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02682531885802746, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.16254269182682038, "signal/confidence_uniqueness_reward/group_std_mean": 0.1992730051279068, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01625426858663559, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01625426858663559, "signal/format_reward/centered_abs_mean": 0.026285807229578496, "signal/format_reward/group_std_mean": 0.05511143393814564, "signal/format_reward/group_zero_std_frac": 0.7555555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013142903614789248, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013142903614789248, "signal/frontier_coverage_0/centered_abs_mean": 0.02683491036295891, "signal/frontier_coverage_0/group_std_mean": 0.04888941571116447, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0003837391850538552, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0003837391850538552, "signal/frontier_coverage_1/centered_abs_mean": 0.02683491036295891, "signal/frontier_coverage_1/group_std_mean": 0.04888941571116447, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0003837391850538552, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0003837391850538552, "signal/frontier_coverage_10/centered_abs_mean": 0.02683491036295891, "signal/frontier_coverage_10/group_std_mean": 0.04888941571116447, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0003837391850538552, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0003837391850538552, "signal/frontier_coverage_15/centered_abs_mean": 0.02683491036295891, "signal/frontier_coverage_15/group_std_mean": 0.04888941571116447, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0003837391850538552, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0003837391850538552, "signal/frontier_coverage_20/centered_abs_mean": 0.02683491036295891, "signal/frontier_coverage_20/group_std_mean": 0.04888941571116447, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0003837391850538552, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0003837391850538552, "signal/frontier_coverage_25/centered_abs_mean": 0.02683491036295891, "signal/frontier_coverage_25/group_std_mean": 0.04888941571116447, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0003837391850538552, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0003837391850538552, "signal/frontier_coverage_5/centered_abs_mean": 0.02683491036295891, "signal/frontier_coverage_5/group_std_mean": 0.04888941571116447, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0003837391850538552, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0003837391850538552, "step": 20 }, { "calibration/aurc": 0.39448058005388165, "calibration/batch_distribution_entropy": 0.4518564097762634, "calibration/buffer_distribution_entropy": 0.3448626963299991, "calibration/confidence_entropy": 0.3213623682330117, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.016753926701570682, "calibration/coverage@20%": 0.019895287958115182, "calibration/coverage@25%": 0.12198952879581151, "calibration/coverage@30%": 0.18481675392670155, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3243279868719355, "calibration/mean_confidence": 0.8755994903434999, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007204861111111116, "completions/max_length": 3844.8, "completions/max_terminated_length": 3844.8, "completions/mean_length": 468.8833435058594, "completions/mean_terminated_length": 472.284619140625, "completions/min_length": 0.0, "completions/min_terminated_length": 99.2, "epoch": 0.05999925000937488, "grad_norm": 0.0007930905558168888, "learning_rate": 2.9761904761904763e-06, "loss": -0.0046, "num_tokens": 42063651.0, "reward": 0.8718119025230407, "reward_std": 0.2136448562145233, "rewards/accuracy_reward": 0.4987847149372101, "rewards/brier_reward": 0.6132471799850464, "rewards/confidence_uniqueness_reward": 0.6505587697029114, "rewards/format_reward": 0.9907118201255798, "rewards/frontier_coverage_0": 0.00682337733451277, "rewards/frontier_coverage_1": 0.00682337733451277, "rewards/frontier_coverage_10": 0.00682337733451277, "rewards/frontier_coverage_15": 0.00682337733451277, "rewards/frontier_coverage_20": 0.00682337733451277, "rewards/frontier_coverage_25": 0.00682337733451277, "rewards/frontier_coverage_5": 0.00682337733451277, "signal/accuracy_reward/centered_abs_mean": 0.28725042939186096, "signal/accuracy_reward/group_std_mean": 0.35440042018890383, "signal/accuracy_reward/group_zero_std_frac": 0.08888889104127884, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14362521469593048, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14362521469593048, "signal/advantage_abs_mean": 0.16881968677043915, "signal/advantage_pre_scale_abs_mean": 0.16881968677043915, "signal/advantage_pre_scale_std": 0.22591695785522461, "signal/advantage_std": 0.22591695785522461, "signal/brier_reward/centered_abs_mean": 0.23450190126895903, "signal/brier_reward/group_std_mean": 0.28934155106544496, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02345018908381462, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02345018908381462, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1386233687400818, "signal/confidence_uniqueness_reward/group_std_mean": 0.16758487224578858, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013862336613237857, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013862336613237857, "signal/format_reward/centered_abs_mean": 0.017116970755159854, "signal/format_reward/group_std_mean": 0.038513346761465075, "signal/format_reward/group_zero_std_frac": 0.819444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008558485377579927, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008558485377579927, "signal/frontier_coverage_0/centered_abs_mean": 0.030816724896430968, "signal/frontier_coverage_0/group_std_mean": 0.05202222615480423, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00044067916460335257, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00044067916460335257, "signal/frontier_coverage_1/centered_abs_mean": 0.030816724896430968, "signal/frontier_coverage_1/group_std_mean": 0.05202222615480423, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00044067916460335257, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00044067916460335257, "signal/frontier_coverage_10/centered_abs_mean": 0.030816724896430968, "signal/frontier_coverage_10/group_std_mean": 0.05202222615480423, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00044067916460335257, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00044067916460335257, "signal/frontier_coverage_15/centered_abs_mean": 0.030816724896430968, "signal/frontier_coverage_15/group_std_mean": 0.05202222615480423, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00044067916460335257, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00044067916460335257, "signal/frontier_coverage_20/centered_abs_mean": 0.030816724896430968, "signal/frontier_coverage_20/group_std_mean": 0.05202222615480423, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00044067916460335257, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00044067916460335257, "signal/frontier_coverage_25/centered_abs_mean": 0.030816724896430968, "signal/frontier_coverage_25/group_std_mean": 0.05202222615480423, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00044067916460335257, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00044067916460335257, "signal/frontier_coverage_5/centered_abs_mean": 0.030816724896430968, "signal/frontier_coverage_5/group_std_mean": 0.05202222615480423, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00044067916460335257, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00044067916460335257, "step": 25 }, { "calibration/aurc": 0.28599355007968147, "calibration/batch_distribution_entropy": 0.5729950289638938, "calibration/buffer_distribution_entropy": 0.4415618909140395, "calibration/confidence_entropy": 0.39785220307742414, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.013123359580052493, "calibration/coverage@15%": 0.020935859580052493, "calibration/coverage@20%": 0.16300624246293538, "calibration/coverage@25%": 0.3446692071008016, "calibration/coverage@30%": 0.5385029855643044, "calibration/coverage@5%": 0.013123359580052493, "calibration/ece": 0.18942492818507478, "calibration/mean_confidence": 0.8361121732549478, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013020833333333325, "completions/max_length": 3944.4, "completions/max_terminated_length": 3944.4, "completions/mean_length": 559.3470703125, "completions/mean_terminated_length": 566.7720703125, "completions/min_length": 0.0, "completions/min_terminated_length": 122.8, "epoch": 0.07199910001124986, "grad_norm": 0.0005365905235521495, "learning_rate": 3.5714285714285718e-06, "loss": -0.0078, "num_tokens": 51617249.0, "reward": 0.9077984690666199, "reward_std": 0.19471972584724426, "rewards/accuracy_reward": 0.56171875, "rewards/brier_reward": 0.6795339226722718, "rewards/confidence_uniqueness_reward": 0.6588276028633118, "rewards/format_reward": 0.9855034708976745, "rewards/frontier_coverage_0": 0.003508235071785748, "rewards/frontier_coverage_1": 0.003508235071785748, "rewards/frontier_coverage_10": 0.003508235071785748, "rewards/frontier_coverage_15": 0.003508235071785748, "rewards/frontier_coverage_20": 0.003508235071785748, "rewards/frontier_coverage_25": 0.003508235071785748, "rewards/frontier_coverage_5": 0.003508235071785748, "signal/accuracy_reward/centered_abs_mean": 0.24979926645755768, "signal/accuracy_reward/group_std_mean": 0.31267011165618896, "signal/accuracy_reward/group_zero_std_frac": 0.17500000447034836, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12489963322877884, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.12489963322877884, "signal/advantage_abs_mean": 0.15053375214338302, "signal/advantage_pre_scale_abs_mean": 0.15053375214338302, "signal/advantage_pre_scale_std": 0.21564119458198547, "signal/advantage_std": 0.21564119458198547, "signal/brier_reward/centered_abs_mean": 0.1923845499753952, "signal/brier_reward/group_std_mean": 0.24107061624526976, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019238455034792424, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019238455034792424, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.15954148322343825, "signal/confidence_uniqueness_reward/group_std_mean": 0.19074728488922119, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01595414914190769, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01595414914190769, "signal/format_reward/centered_abs_mean": 0.02523328997194767, "signal/format_reward/group_std_mean": 0.05074257925152779, "signal/format_reward/group_zero_std_frac": 0.7777777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012616644985973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012616644985973835, "signal/frontier_coverage_0/centered_abs_mean": 0.036959283798933026, "signal/frontier_coverage_0/group_std_mean": 0.057681336998939514, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0005285177612677217, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0005285177612677217, "signal/frontier_coverage_1/centered_abs_mean": 0.036959283798933026, "signal/frontier_coverage_1/group_std_mean": 0.057681336998939514, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0005285177612677217, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0005285177612677217, "signal/frontier_coverage_10/centered_abs_mean": 0.036959283798933026, "signal/frontier_coverage_10/group_std_mean": 0.057681336998939514, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0005285177612677217, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0005285177612677217, "signal/frontier_coverage_15/centered_abs_mean": 0.036959283798933026, "signal/frontier_coverage_15/group_std_mean": 0.057681336998939514, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0005285177612677217, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0005285177612677217, "signal/frontier_coverage_20/centered_abs_mean": 0.036959283798933026, "signal/frontier_coverage_20/group_std_mean": 0.057681336998939514, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005285177612677217, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005285177612677217, "signal/frontier_coverage_25/centered_abs_mean": 0.036959283798933026, "signal/frontier_coverage_25/group_std_mean": 0.057681336998939514, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005285177612677217, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005285177612677217, "signal/frontier_coverage_5/centered_abs_mean": 0.036959283798933026, "signal/frontier_coverage_5/group_std_mean": 0.057681336998939514, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0005285177612677217, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0005285177612677217, "step": 30 }, { "calibration/aurc": 0.2632535671771489, "calibration/batch_distribution_entropy": 0.6466923551545329, "calibration/buffer_distribution_entropy": 0.5421351864917888, "calibration/confidence_entropy": 0.44397516911608326, "calibration/coverage@0%": 0.015845758641871894, "calibration/coverage@1%": 0.015845758641871894, "calibration/coverage@10%": 0.06649707894187398, "calibration/coverage@15%": 0.1285361788638969, "calibration/coverage@20%": 0.16434714020955804, "calibration/coverage@25%": 0.38898227624249804, "calibration/coverage@30%": 0.8153820641936578, "calibration/coverage@5%": 0.015845758641871894, "calibration/ece": 0.14365905416536864, "calibration/mean_confidence": 0.8021350863697474, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015364583333333348, "completions/max_length": 4040.8, "completions/max_terminated_length": 4040.8, "completions/mean_length": 634.3866333007812, "completions/mean_terminated_length": 644.3625610351562, "completions/min_length": 0.0, "completions/min_terminated_length": 179.6, "epoch": 0.08399895001312484, "grad_norm": 0.0005198422586545348, "learning_rate": 4.166666666666667e-06, "loss": -0.0098, "num_tokens": 62002823.0, "reward": 0.9421573758125306, "reward_std": 0.17538723051548005, "rewards/accuracy_reward": 0.6183159828186036, "rewards/brier_reward": 0.7280090808868408, "rewards/confidence_uniqueness_reward": 0.6922753095626831, "rewards/format_reward": 0.9828993201255798, "rewards/frontier_coverage_0": -0.004782191128470004, "rewards/frontier_coverage_1": -0.004782191128470004, "rewards/frontier_coverage_10": -0.004782191128470004, "rewards/frontier_coverage_15": -0.004782191128470004, "rewards/frontier_coverage_20": -0.004782191128470004, "rewards/frontier_coverage_25": -0.004782191128470004, "rewards/frontier_coverage_5": -0.004782191128470004, "signal/accuracy_reward/centered_abs_mean": 0.21829969584941863, "signal/accuracy_reward/group_std_mean": 0.27772454619407655, "signal/accuracy_reward/group_zero_std_frac": 0.25, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10914984792470932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10914984792470932, "signal/advantage_abs_mean": 0.13406893461942673, "signal/advantage_pre_scale_abs_mean": 0.13406893461942673, "signal/advantage_pre_scale_std": 0.20243431627750397, "signal/advantage_std": 0.20243431627750397, "signal/brier_reward/centered_abs_mean": 0.1590863436460495, "signal/brier_reward/group_std_mean": 0.20434601306915284, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01590863484889269, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01590863484889269, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.12657882273197174, "signal/confidence_uniqueness_reward/group_std_mean": 0.15774886459112167, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01265788208693266, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01265788208693266, "signal/format_reward/centered_abs_mean": 0.02765299491584301, "signal/format_reward/group_std_mean": 0.04936157241463661, "signal/format_reward/group_zero_std_frac": 0.8083333492279052, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013826497457921505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013826497457921505, "signal/frontier_coverage_0/centered_abs_mean": 0.046808502078056334, "signal/frontier_coverage_0/group_std_mean": 0.06622445359826087, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0006693615694530308, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0006693615694530308, "signal/frontier_coverage_1/centered_abs_mean": 0.046808502078056334, "signal/frontier_coverage_1/group_std_mean": 0.06622445359826087, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0006693615694530308, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0006693615694530308, "signal/frontier_coverage_10/centered_abs_mean": 0.046808502078056334, "signal/frontier_coverage_10/group_std_mean": 0.06622445359826087, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0006693615694530308, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0006693615694530308, "signal/frontier_coverage_15/centered_abs_mean": 0.046808502078056334, "signal/frontier_coverage_15/group_std_mean": 0.06622445359826087, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0006693615694530308, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0006693615694530308, "signal/frontier_coverage_20/centered_abs_mean": 0.046808502078056334, "signal/frontier_coverage_20/group_std_mean": 0.06622445359826087, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006693615694530308, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006693615694530308, "signal/frontier_coverage_25/centered_abs_mean": 0.046808502078056334, "signal/frontier_coverage_25/group_std_mean": 0.06622445359826087, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006693615694530308, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006693615694530308, "signal/frontier_coverage_5/centered_abs_mean": 0.046808502078056334, "signal/frontier_coverage_5/group_std_mean": 0.06622445359826087, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0006693615694530308, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0006693615694530308, "step": 35 }, { "calibration/aurc": 0.2676519777027838, "calibration/batch_distribution_entropy": 0.7054325857041888, "calibration/buffer_distribution_entropy": 0.6344972967811773, "calibration/confidence_entropy": 0.4799178907927473, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0599751997795536, "calibration/coverage@20%": 0.28643587561566747, "calibration/coverage@25%": 0.4707250139453013, "calibration/coverage@30%": 0.6129981332198531, "calibration/coverage@5%": 0.0, "calibration/ece": 0.12413084652207873, "calibration/mean_confidence": 0.7693312154212686, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013541666666666697, "completions/max_length": 3869.6, "completions/max_terminated_length": 3869.6, "completions/mean_length": 690.2263916015625, "completions/mean_terminated_length": 699.692431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.09599880001499982, "grad_norm": 0.0004507621633820236, "learning_rate": 4.761904761904762e-06, "loss": -0.01, "num_tokens": 73073751.0, "reward": 0.9667992949485779, "reward_std": 0.15924489200115205, "rewards/accuracy_reward": 0.6505208253860474, "rewards/brier_reward": 0.754754900932312, "rewards/confidence_uniqueness_reward": 0.7449337363243103, "rewards/format_reward": 0.9853298664093018, "rewards/frontier_coverage_0": -0.011506949504837393, "rewards/frontier_coverage_1": -0.011506949504837393, "rewards/frontier_coverage_10": -0.011506949504837393, "rewards/frontier_coverage_15": -0.011506949504837393, "rewards/frontier_coverage_20": -0.011012806138023735, "rewards/frontier_coverage_25": -0.008019896177574991, "rewards/frontier_coverage_5": -0.011506949504837393, "signal/accuracy_reward/centered_abs_mean": 0.18986544907093048, "signal/accuracy_reward/group_std_mean": 0.2536569803953171, "signal/accuracy_reward/group_zero_std_frac": 0.2777777761220932, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09493272453546524, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09493272453546524, "signal/advantage_abs_mean": 0.1158403992652893, "signal/advantage_pre_scale_abs_mean": 0.1158403992652893, "signal/advantage_pre_scale_std": 0.18982842862606047, "signal/advantage_std": 0.18982842862606047, "signal/brier_reward/centered_abs_mean": 0.14530260264873504, "signal/brier_reward/group_std_mean": 0.18963007628917694, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014530261047184467, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014530261047184467, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08863844275474549, "signal/confidence_uniqueness_reward/group_std_mean": 0.1167033389210701, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008863845001906156, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008863845001906156, "signal/format_reward/centered_abs_mean": 0.02526584193110466, "signal/format_reward/group_std_mean": 0.04484616741538048, "signal/format_reward/group_zero_std_frac": 0.8277777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01263292096555233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01263292096555233, "signal/frontier_coverage_0/centered_abs_mean": 0.06523959785699844, "signal/frontier_coverage_0/group_std_mean": 0.08977894186973571, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0009329262189567089, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0009329262189567089, "signal/frontier_coverage_1/centered_abs_mean": 0.06523959785699844, "signal/frontier_coverage_1/group_std_mean": 0.08977894186973571, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0009329262189567089, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0009329262189567089, "signal/frontier_coverage_10/centered_abs_mean": 0.06523959785699844, "signal/frontier_coverage_10/group_std_mean": 0.08977894186973571, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0009329262189567089, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009329262189567089, "signal/frontier_coverage_15/centered_abs_mean": 0.06523959785699844, "signal/frontier_coverage_15/group_std_mean": 0.08977894186973571, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009329262189567089, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009329262189567089, "signal/frontier_coverage_20/centered_abs_mean": 0.06372052878141403, "signal/frontier_coverage_20/group_std_mean": 0.08785968273878098, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009112035506404937, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009112035506404937, "signal/frontier_coverage_25/centered_abs_mean": 0.05406465157866478, "signal/frontier_coverage_25/group_std_mean": 0.07568821161985398, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007731245132163167, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007731245132163167, "signal/frontier_coverage_5/centered_abs_mean": 0.06523959785699844, "signal/frontier_coverage_5/group_std_mean": 0.08977894186973571, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0009329262189567089, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0009329262189567089, "step": 40 }, { "calibration/aurc": 0.23071652949504246, "calibration/batch_distribution_entropy": 0.7055601707639283, "calibration/buffer_distribution_entropy": 0.6936541419751034, "calibration/confidence_entropy": 0.4729729373365611, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0061111111111111106, "calibration/coverage@15%": 0.06558502555601603, "calibration/coverage@20%": 0.2877821522309711, "calibration/coverage@25%": 0.69798687354932, "calibration/coverage@30%": 0.9863517060367453, "calibration/coverage@5%": 0.0, "calibration/ece": 0.10290537054204563, "calibration/mean_confidence": 0.7594771358212933, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014409722222222188, "completions/max_length": 3817.8, "completions/max_terminated_length": 3817.8, "completions/mean_length": 731.4316040039063, "completions/mean_terminated_length": 742.1711059570313, "completions/min_length": 0.0, "completions/min_terminated_length": 232.6, "epoch": 0.1079986500168748, "grad_norm": 0.00046814393135719, "learning_rate": 4.909638554216868e-06, "loss": -0.0112, "num_tokens": 84635107.0, "reward": 0.9677613854408265, "reward_std": 0.15975097417831421, "rewards/accuracy_reward": 0.6506076574325561, "rewards/brier_reward": 0.7581860542297363, "rewards/confidence_uniqueness_reward": 0.7537668347358704, "rewards/format_reward": 0.984375, "rewards/frontier_coverage_0": -0.011279890162404627, "rewards/frontier_coverage_1": -0.011279890162404627, "rewards/frontier_coverage_10": -0.011279890162404627, "rewards/frontier_coverage_15": -0.011279890162404627, "rewards/frontier_coverage_20": -0.007542286452371627, "rewards/frontier_coverage_25": -0.0007611054461449385, "rewards/frontier_coverage_5": -0.011279890162404627, "signal/accuracy_reward/centered_abs_mean": 0.19601236879825593, "signal/accuracy_reward/group_std_mean": 0.25668974220752716, "signal/accuracy_reward/group_zero_std_frac": 0.27777778506278994, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09800618439912796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09800618439912796, "signal/advantage_abs_mean": 0.11878292560577393, "signal/advantage_pre_scale_abs_mean": 0.11878292560577393, "signal/advantage_pre_scale_std": 0.1898341953754425, "signal/advantage_std": 0.1898341953754425, "signal/brier_reward/centered_abs_mean": 0.1457345962524414, "signal/brier_reward/group_std_mean": 0.19106005132198334, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014573459327220917, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014573459327220917, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.10429143160581589, "signal/confidence_uniqueness_reward/group_std_mean": 0.13065478056669236, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010429143160581588, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010429143160581588, "signal/format_reward/centered_abs_mean": 0.02532552070915699, "signal/format_reward/group_std_mean": 0.04548909664154053, "signal/format_reward/group_zero_std_frac": 0.819444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012662760354578495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012662760354578495, "signal/frontier_coverage_0/centered_abs_mean": 0.07666564732789993, "signal/frontier_coverage_0/group_std_mean": 0.10538152903318405, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001096318766940385, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001096318766940385, "signal/frontier_coverage_1/centered_abs_mean": 0.07666564732789993, "signal/frontier_coverage_1/group_std_mean": 0.10538152903318405, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001096318766940385, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001096318766940385, "signal/frontier_coverage_10/centered_abs_mean": 0.07666564732789993, "signal/frontier_coverage_10/group_std_mean": 0.10538152903318405, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001096318766940385, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001096318766940385, "signal/frontier_coverage_15/centered_abs_mean": 0.07666564732789993, "signal/frontier_coverage_15/group_std_mean": 0.10538152903318405, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001096318766940385, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001096318766940385, "signal/frontier_coverage_20/centered_abs_mean": 0.0640810675919056, "signal/frontier_coverage_20/group_std_mean": 0.08941369652748107, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000916359294205904, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000916359294205904, "signal/frontier_coverage_25/centered_abs_mean": 0.04283785969018936, "signal/frontier_coverage_25/group_std_mean": 0.062041699141263965, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006125814048573375, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006125814048573375, "signal/frontier_coverage_5/centered_abs_mean": 0.07666564732789993, "signal/frontier_coverage_5/group_std_mean": 0.10538152903318405, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001096318766940385, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001096318766940385, "step": 45 }, { "calibration/aurc": 0.39586401271300875, "calibration/batch_distribution_entropy": 0.7615897964351726, "calibration/buffer_distribution_entropy": 0.7270643773427474, "calibration/confidence_entropy": 0.5045923336911955, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.012635389036251105, "calibration/coverage@15%": 0.013696396993810787, "calibration/coverage@20%": 0.023667813000846843, "calibration/coverage@25%": 0.03044551748633497, "calibration/coverage@30%": 0.15728762274949287, "calibration/coverage@5%": 0.0, "calibration/ece": 0.21160105286348876, "calibration/mean_confidence": 0.7227722715219795, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00737847222222221, "completions/max_length": 3473.0, "completions/max_terminated_length": 3473.0, "completions/mean_length": 759.92587890625, "completions/mean_terminated_length": 765.5557373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 232.0, "epoch": 0.11999850001874976, "grad_norm": 0.0005435345810838044, "learning_rate": 4.759036144578314e-06, "loss": -0.0043, "num_tokens": 96487053.0, "reward": 0.9660153031349182, "reward_std": 0.1462629795074463, "rewards/accuracy_reward": 0.6291666626930237, "rewards/brier_reward": 0.7552559852600098, "rewards/confidence_uniqueness_reward": 0.7993631482124328, "rewards/format_reward": 0.9925347208976746, "rewards/frontier_coverage_0": -0.00474322558275162, "rewards/frontier_coverage_1": -0.00474322558275162, "rewards/frontier_coverage_10": -0.00474322558275162, "rewards/frontier_coverage_15": -0.00474322558275162, "rewards/frontier_coverage_20": -0.0008788239560090005, "rewards/frontier_coverage_25": 0.0038009291049093006, "rewards/frontier_coverage_5": -0.00474322558275162, "signal/accuracy_reward/centered_abs_mean": 0.18924696147441863, "signal/accuracy_reward/group_std_mean": 0.2494819164276123, "signal/accuracy_reward/group_zero_std_frac": 0.29166667759418485, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09462348073720932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09462348073720932, "signal/advantage_abs_mean": 0.10812054872512818, "signal/advantage_pre_scale_abs_mean": 0.10812054872512818, "signal/advantage_pre_scale_std": 0.17321833372116088, "signal/advantage_std": 0.17321833372116088, "signal/brier_reward/centered_abs_mean": 0.13809363842010497, "signal/brier_reward/group_std_mean": 0.18017106652259826, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013809364847838878, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013809364847838878, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.079685477912426, "signal/confidence_uniqueness_reward/group_std_mean": 0.10326177328824997, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00796854794025421, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00796854794025421, "signal/format_reward/centered_abs_mean": 0.013270399440079928, "signal/format_reward/group_std_mean": 0.02645639069378376, "signal/format_reward/group_zero_std_frac": 0.8861111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006635199720039964, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006635199720039964, "signal/frontier_coverage_0/centered_abs_mean": 0.08481028228998184, "signal/frontier_coverage_0/group_std_mean": 0.11866706758737564, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012127869995310903, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012127869995310903, "signal/frontier_coverage_1/centered_abs_mean": 0.08481028228998184, "signal/frontier_coverage_1/group_std_mean": 0.11866706758737564, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012127869995310903, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012127869995310903, "signal/frontier_coverage_10/centered_abs_mean": 0.08481028228998184, "signal/frontier_coverage_10/group_std_mean": 0.11866706758737564, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012127869995310903, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012127869995310903, "signal/frontier_coverage_15/centered_abs_mean": 0.08481028228998184, "signal/frontier_coverage_15/group_std_mean": 0.11866706758737564, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012127869995310903, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012127869995310903, "signal/frontier_coverage_20/centered_abs_mean": 0.07179783061146736, "signal/frontier_coverage_20/group_std_mean": 0.10189384371042251, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001026709016878158, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001026709016878158, "signal/frontier_coverage_25/centered_abs_mean": 0.046491443365812304, "signal/frontier_coverage_25/group_std_mean": 0.06868501603603364, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006648276466876268, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006648276466876268, "signal/frontier_coverage_5/centered_abs_mean": 0.08481028228998184, "signal/frontier_coverage_5/group_std_mean": 0.11866706758737564, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012127869995310903, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012127869995310903, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.2776295888815277, "eval_calibration/batch_distribution_entropy": 0.7319698598432759, "eval_calibration/buffer_distribution_entropy": 0.7449058358345398, "eval_calibration/confidence_entropy": 0.5090239030799538, "eval_calibration/coverage@0%": 0.08854166666666667, "eval_calibration/coverage@1%": 0.08854166666666667, "eval_calibration/coverage@10%": 0.171875, "eval_calibration/coverage@15%": 0.2604166666666667, "eval_calibration/coverage@20%": 0.2708333333333333, "eval_calibration/coverage@25%": 0.3541666666666667, "eval_calibration/coverage@30%": 0.6145833333333334, "eval_calibration/coverage@5%": 0.08854166666666667, "eval_calibration/ece": 0.165546875, "eval_calibration/mean_confidence": 0.7360677083333335, "eval_completions/clipped_ratio": 0.006944444444444438, "eval_completions/max_length": 2543.1666666666665, "eval_completions/max_terminated_length": 2543.1666666666665, "eval_completions/mean_length": 727.5720621744791, "eval_completions/mean_terminated_length": 732.6880798339844, "eval_completions/min_length": 45.833333333333336, "eval_completions/min_terminated_length": 270.3333333333333, "eval_loss": 0.0, "eval_num_tokens": 96487053.0, "eval_reward": 0.968879888455073, "eval_reward_std": 0.2623755360643069, "eval_rewards/accuracy_reward": 0.6397569378217062, "eval_rewards/brier_reward": 0.7657919128735861, "eval_rewards/confidence_uniqueness_reward": 0.7643194397290548, "eval_rewards/format_reward": 0.9921875, "eval_rewards/frontier_coverage_0": -0.002180379558315811, "eval_rewards/frontier_coverage_1": -0.002180379558315811, "eval_rewards/frontier_coverage_10": -0.002180379558315811, "eval_rewards/frontier_coverage_15": -0.001989841514538663, "eval_rewards/frontier_coverage_20": -0.0009192682919092476, "eval_rewards/frontier_coverage_25": 0.00439577810660315, "eval_rewards/frontier_coverage_5": -0.002180379558315811, "eval_runtime": 204.3039, "eval_samples_per_second": 4.895, "eval_signal/accuracy_reward/centered_abs_mean": 0.44677734375, "eval_signal/accuracy_reward/group_std_mean": 0.47931412359078723, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.223388671875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.223388671875, "eval_signal/advantage_abs_mean": 0.23765324552853903, "eval_signal/advantage_pre_scale_abs_mean": 0.23765324552853903, "eval_signal/advantage_pre_scale_std": 0.2600039492050807, "eval_signal/advantage_std": 0.2600039492050807, "eval_signal/brier_reward/centered_abs_mean": 0.21157046655813852, "eval_signal/brier_reward/group_std_mean": 0.26279614369074505, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021157047400871914, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.021157047400871914, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.10844459633032481, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.13759969919919968, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010844459757208824, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010844459757208824, "eval_signal/format_reward/centered_abs_mean": 0.015136718284338713, "eval_signal/format_reward/group_std_mean": 0.044194173999130726, "eval_signal/format_reward/group_zero_std_frac": 0.7500000298023224, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007568359142169356, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359142169356, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.10852197060982387, "eval_signal/frontier_coverage_0/group_std_mean": 0.16503738115231195, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015518641448579729, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015518641448579729, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.10852197060982387, "eval_signal/frontier_coverage_1/group_std_mean": 0.16503738115231195, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015518641448579729, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015518641448579729, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.10852197060982387, "eval_signal/frontier_coverage_10/group_std_mean": 0.16503738115231195, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015518641448579729, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015518641448579729, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.10724649329980214, "eval_signal/frontier_coverage_15/group_std_mean": 0.16348060965538025, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015336248131158452, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015336248131158452, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.09694457550843556, "eval_signal/frontier_coverage_20/group_std_mean": 0.14916668087244034, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001386307393355916, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001386307393355916, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.06621957384049892, "eval_signal/frontier_coverage_25/group_std_mean": 0.10746484374006589, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009469399325704823, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009469399325704823, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.10852197060982387, "eval_signal/frontier_coverage_5/group_std_mean": 0.16503738115231195, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015518641448579729, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015518641448579729, "eval_steps_per_second": 0.029, "step": 50 }, { "calibration/aurc": 0.31649640361405623, "calibration/batch_distribution_entropy": 0.8044629368041323, "calibration/buffer_distribution_entropy": 0.7632590824091535, "calibration/confidence_entropy": 0.527233352329629, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.021354166666666667, "calibration/coverage@15%": 0.029228182414698163, "calibration/coverage@20%": 0.10320702099737533, "calibration/coverage@25%": 0.3614583333333333, "calibration/coverage@30%": 0.40374331550802134, "calibration/coverage@5%": 0.0, "calibration/ece": 0.13912997885501577, "calibration/mean_confidence": 0.704141535873572, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006250000000000022, "completions/max_length": 3575.8, "completions/max_terminated_length": 3575.8, "completions/mean_length": 764.2988037109375, "completions/mean_terminated_length": 769.2012573242188, "completions/min_length": 0.0, "completions/min_terminated_length": 205.4, "epoch": 0.13199835002062474, "grad_norm": 0.00045138923451304436, "learning_rate": 4.60843373493976e-06, "loss": -0.0045, "num_tokens": 108372351.0, "reward": 0.9776891589164733, "reward_std": 0.14157166481018066, "rewards/accuracy_reward": 0.6438368082046508, "rewards/brier_reward": 0.7670759439468384, "rewards/confidence_uniqueness_reward": 0.8284211039543152, "rewards/format_reward": 0.9937500119209289, "rewards/frontier_coverage_0": -0.008810842200182379, "rewards/frontier_coverage_1": -0.008810842200182379, "rewards/frontier_coverage_10": -0.008810842200182379, "rewards/frontier_coverage_15": -0.007830455573275686, "rewards/frontier_coverage_20": -0.004610662302002311, "rewards/frontier_coverage_25": 0.0019515341526130214, "rewards/frontier_coverage_5": -0.008810842200182379, "signal/accuracy_reward/centered_abs_mean": 0.18646375834941864, "signal/accuracy_reward/group_std_mean": 0.24433983564376832, "signal/accuracy_reward/group_zero_std_frac": 0.31666667461395265, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09323187917470932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09323187917470932, "signal/advantage_abs_mean": 0.10541787147521972, "signal/advantage_pre_scale_abs_mean": 0.10541787147521972, "signal/advantage_pre_scale_std": 0.16827026903629302, "signal/advantage_std": 0.16827026903629302, "signal/brier_reward/centered_abs_mean": 0.13579574525356292, "signal/brier_reward/group_std_mean": 0.1776826024055481, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013579574786126614, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013579574786126614, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07823738157749176, "signal/confidence_uniqueness_reward/group_std_mean": 0.1013486623764038, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007823738548904658, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007823738548904658, "signal/format_reward/centered_abs_mean": 0.01110026049427688, "signal/format_reward/group_std_mean": 0.0223752673715353, "signal/format_reward/group_zero_std_frac": 0.9027777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00555013024713844, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00555013024713844, "signal/frontier_coverage_0/centered_abs_mean": 0.09882133305072785, "signal/frontier_coverage_0/group_std_mean": 0.13713002651929856, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014131450327113271, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014131450327113271, "signal/frontier_coverage_1/centered_abs_mean": 0.09882133305072785, "signal/frontier_coverage_1/group_std_mean": 0.13713002651929856, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014131450327113271, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014131450327113271, "signal/frontier_coverage_10/centered_abs_mean": 0.09882133305072785, "signal/frontier_coverage_10/group_std_mean": 0.13713002651929856, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014131450327113271, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014131450327113271, "signal/frontier_coverage_15/centered_abs_mean": 0.09499142318964005, "signal/frontier_coverage_15/group_std_mean": 0.13223906755447387, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013583773747086526, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013583773747086526, "signal/frontier_coverage_20/centered_abs_mean": 0.08681065887212754, "signal/frontier_coverage_20/group_std_mean": 0.12124353647232056, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012413924559950829, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012413924559950829, "signal/frontier_coverage_25/centered_abs_mean": 0.06926739811897278, "signal/frontier_coverage_25/group_std_mean": 0.09777135252952576, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009905237704515458, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009905237704515458, "signal/frontier_coverage_5/centered_abs_mean": 0.09882133305072785, "signal/frontier_coverage_5/group_std_mean": 0.13713002651929856, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014131450327113271, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014131450327113271, "step": 55 }, { "calibration/aurc": 0.3382260111628641, "calibration/batch_distribution_entropy": 0.8498678716551673, "calibration/buffer_distribution_entropy": 0.8051509048121346, "calibration/confidence_entropy": 0.5316433536264733, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.042708333333333334, "calibration/coverage@15%": 0.2808253627968338, "calibration/coverage@20%": 0.3194285460613261, "calibration/coverage@25%": 0.36942854606132614, "calibration/coverage@30%": 0.46338289628488666, "calibration/coverage@5%": 0.0, "calibration/ece": 0.1688721630430574, "calibration/mean_confidence": 0.6809634581476074, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0057291666666666515, "completions/max_length": 3641.0, "completions/max_terminated_length": 3641.0, "completions/mean_length": 771.4474853515625, "completions/mean_terminated_length": 775.8955322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 225.6, "epoch": 0.14399820002249972, "grad_norm": 0.0004424219368956983, "learning_rate": 4.457831325301205e-06, "loss": -0.0032, "num_tokens": 120356002.0, "reward": 0.9673421621322632, "reward_std": 0.14665516316890717, "rewards/accuracy_reward": 0.6087673544883728, "rewards/brier_reward": 0.7633313059806823, "rewards/confidence_uniqueness_reward": 0.8849505186080933, "rewards/format_reward": 0.9940104007720947, "rewards/frontier_coverage_0": 0.010127071291208267, "rewards/frontier_coverage_1": 0.010127071291208267, "rewards/frontier_coverage_10": 0.010127071291208267, "rewards/frontier_coverage_15": 0.010443565156310796, "rewards/frontier_coverage_20": 0.012819062476046384, "rewards/frontier_coverage_25": 0.0149055490270257, "rewards/frontier_coverage_5": 0.010127071291208267, "signal/accuracy_reward/centered_abs_mean": 0.19885525107383728, "signal/accuracy_reward/group_std_mean": 0.2586120396852493, "signal/accuracy_reward/group_zero_std_frac": 0.28333333134651184, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09942762553691864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09942762553691864, "signal/advantage_abs_mean": 0.10986414402723313, "signal/advantage_pre_scale_abs_mean": 0.10986414402723313, "signal/advantage_pre_scale_std": 0.17161572575569153, "signal/advantage_std": 0.17161572575569153, "signal/brier_reward/centered_abs_mean": 0.1456875115633011, "signal/brier_reward/group_std_mean": 0.18925343751907348, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01456875205039978, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01456875205039978, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07028612047433853, "signal/confidence_uniqueness_reward/group_std_mean": 0.0925293281674385, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007028611935675144, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007028611935675144, "signal/format_reward/centered_abs_mean": 0.01108398474752903, "signal/format_reward/group_std_mean": 0.025465189665555953, "signal/format_reward/group_zero_std_frac": 0.8777777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005541992373764515, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005541992373764515, "signal/frontier_coverage_0/centered_abs_mean": 0.11798207312822342, "signal/frontier_coverage_0/group_std_mean": 0.1616940289735794, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016871436731889845, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016871436731889845, "signal/frontier_coverage_1/centered_abs_mean": 0.11798207312822342, "signal/frontier_coverage_1/group_std_mean": 0.1616940289735794, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016871436731889845, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016871436731889845, "signal/frontier_coverage_10/centered_abs_mean": 0.11798207312822342, "signal/frontier_coverage_10/group_std_mean": 0.1616940289735794, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016871436731889845, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016871436731889845, "signal/frontier_coverage_15/centered_abs_mean": 0.11572056114673615, "signal/frontier_coverage_15/group_std_mean": 0.15887772738933564, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016548039624467493, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016548039624467493, "signal/frontier_coverage_20/centered_abs_mean": 0.10319755375385284, "signal/frontier_coverage_20/group_std_mean": 0.14280655086040497, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014757250202819705, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014757250202819705, "signal/frontier_coverage_25/centered_abs_mean": 0.08172965943813323, "signal/frontier_coverage_25/group_std_mean": 0.11435115933418274, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011687340680509805, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011687340680509805, "signal/frontier_coverage_5/centered_abs_mean": 0.11798207312822342, "signal/frontier_coverage_5/group_std_mean": 0.1616940289735794, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016871436731889845, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016871436731889845, "step": 60 }, { "calibration/aurc": 0.24587685737231602, "calibration/batch_distribution_entropy": 0.8593260113425367, "calibration/buffer_distribution_entropy": 0.8419197575160856, "calibration/confidence_entropy": 0.5353930508126851, "calibration/coverage@0%": 0.017225576588337684, "calibration/coverage@1%": 0.017225576588337684, "calibration/coverage@10%": 0.175674499564839, "calibration/coverage@15%": 0.4178592798085291, "calibration/coverage@20%": 0.5347761640557007, "calibration/coverage@25%": 0.64177545691906, "calibration/coverage@30%": 0.7441253263707572, "calibration/coverage@5%": 0.017747769799825935, "calibration/ece": 0.14085569002805973, "calibration/mean_confidence": 0.6692040214761524, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002777777777777768, "completions/max_length": 3182.6, "completions/max_terminated_length": 3182.6, "completions/mean_length": 761.7659790039063, "completions/mean_terminated_length": 763.8692138671875, "completions/min_length": 0.0, "completions/min_terminated_length": 215.8, "epoch": 0.1559980500243747, "grad_norm": 0.0004698596312664449, "learning_rate": 4.307228915662651e-06, "loss": 0.0001, "num_tokens": 132225594.0, "reward": 0.995530652999878, "reward_std": 0.13234637379646302, "rewards/accuracy_reward": 0.64921875, "rewards/brier_reward": 0.785097849369049, "rewards/confidence_uniqueness_reward": 0.9373332023620605, "rewards/format_reward": 0.9971354007720947, "rewards/frontier_coverage_0": -0.002034256886690855, "rewards/frontier_coverage_1": -0.002034256886690855, "rewards/frontier_coverage_10": -0.002034256886690855, "rewards/frontier_coverage_15": -0.0016045193187892437, "rewards/frontier_coverage_20": 0.0038384980522096156, "rewards/frontier_coverage_25": 0.013624860998243093, "rewards/frontier_coverage_5": -0.002034256886690855, "signal/accuracy_reward/centered_abs_mean": 0.18282877504825593, "signal/accuracy_reward/group_std_mean": 0.24428035020828248, "signal/accuracy_reward/group_zero_std_frac": 0.29722222685813904, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09141438752412796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09141438752412796, "signal/advantage_abs_mean": 0.09773297160863877, "signal/advantage_pre_scale_abs_mean": 0.09773297160863877, "signal/advantage_pre_scale_std": 0.1567333608865738, "signal/advantage_std": 0.1567333608865738, "signal/brier_reward/centered_abs_mean": 0.13010090589523315, "signal/brier_reward/group_std_mean": 0.1674443781375885, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01301009114831686, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01301009114831686, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03651793897151947, "signal/confidence_uniqueness_reward/group_std_mean": 0.051064802706241606, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003651794046163559, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003651794046163559, "signal/format_reward/centered_abs_mean": 0.005430772574618459, "signal/format_reward/group_std_mean": 0.013679004088044167, "signal/format_reward/group_zero_std_frac": 0.9305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0027153862873092295, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0027153862873092295, "signal/frontier_coverage_0/centered_abs_mean": 0.12229467630386352, "signal/frontier_coverage_0/group_std_mean": 0.16521598398685455, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017488139681518077, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017488139681518077, "signal/frontier_coverage_1/centered_abs_mean": 0.12229467630386352, "signal/frontier_coverage_1/group_std_mean": 0.16521598398685455, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017488139681518077, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017488139681518077, "signal/frontier_coverage_10/centered_abs_mean": 0.12229467630386352, "signal/frontier_coverage_10/group_std_mean": 0.16521598398685455, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017488139681518077, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017488139681518077, "signal/frontier_coverage_15/centered_abs_mean": 0.12053841352462769, "signal/frontier_coverage_15/group_std_mean": 0.16303691267967224, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017236994579434394, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017236994579434394, "signal/frontier_coverage_20/centered_abs_mean": 0.09820731431245804, "signal/frontier_coverage_20/group_std_mean": 0.13474227339029313, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014043646398931742, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014043646398931742, "signal/frontier_coverage_25/centered_abs_mean": 0.07332679852843285, "signal/frontier_coverage_25/group_std_mean": 0.10180476605892182, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010485732345841825, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010485732345841825, "signal/frontier_coverage_5/centered_abs_mean": 0.12229467630386352, "signal/frontier_coverage_5/group_std_mean": 0.16521598398685455, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017488139681518077, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017488139681518077, "step": 65 }, { "calibration/aurc": 0.26958251359481367, "calibration/batch_distribution_entropy": 0.8663688799379698, "calibration/buffer_distribution_entropy": 0.8733795984294593, "calibration/confidence_entropy": 0.577260486680282, "calibration/coverage@0%": 0.02308205424394914, "calibration/coverage@1%": 0.02308205424394914, "calibration/coverage@10%": 0.12958876708285477, "calibration/coverage@15%": 0.14900721673999842, "calibration/coverage@20%": 0.2683685738270198, "calibration/coverage@25%": 0.3697268444811656, "calibration/coverage@30%": 0.6402569338436326, "calibration/coverage@5%": 0.0755754925641591, "calibration/ece": 0.09930710020716957, "calibration/mean_confidence": 0.6231611950933489, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005902777777777768, "completions/max_length": 3517.4, "completions/max_terminated_length": 3517.4, "completions/mean_length": 762.48056640625, "completions/mean_terminated_length": 767.0295288085938, "completions/min_length": 0.0, "completions/min_terminated_length": 196.6, "epoch": 0.16799790002624967, "grad_norm": 0.00045749920536763966, "learning_rate": 4.156626506024097e-06, "loss": -0.0059, "num_tokens": 144087514.0, "reward": 0.9868767857551575, "reward_std": 0.1281371980905533, "rewards/accuracy_reward": 0.637586796283722, "rewards/brier_reward": 0.7736868143081665, "rewards/confidence_uniqueness_reward": 0.9424182176589966, "rewards/format_reward": 0.9940972208976746, "rewards/frontier_coverage_0": -0.008795747673138976, "rewards/frontier_coverage_1": -0.008795747673138976, "rewards/frontier_coverage_10": -0.008733327453956007, "rewards/frontier_coverage_15": -0.008401900064200163, "rewards/frontier_coverage_20": -0.0042295768857002255, "rewards/frontier_coverage_25": 0.007490093156229704, "rewards/frontier_coverage_5": -0.008795747673138976, "signal/accuracy_reward/centered_abs_mean": 0.17555881142616273, "signal/accuracy_reward/group_std_mean": 0.2333855837583542, "signal/accuracy_reward/group_zero_std_frac": 0.3305555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08777940571308136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08777940571308136, "signal/advantage_abs_mean": 0.09379418194293976, "signal/advantage_pre_scale_abs_mean": 0.09379418194293976, "signal/advantage_pre_scale_std": 0.1555002361536026, "signal/advantage_std": 0.1555002361536026, "signal/brier_reward/centered_abs_mean": 0.12457352876663208, "signal/brier_reward/group_std_mean": 0.160868501663208, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012457353435456753, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012457353435456753, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.032642674446105954, "signal/confidence_uniqueness_reward/group_std_mean": 0.04912559166550636, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032642676495015623, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032642676495015623, "signal/format_reward/centered_abs_mean": 0.010753038246184588, "signal/format_reward/group_std_mean": 0.02312941402196884, "signal/format_reward/group_zero_std_frac": 0.8944444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005376519123092294, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005376519123092294, "signal/frontier_coverage_0/centered_abs_mean": 0.13515576124191284, "signal/frontier_coverage_0/group_std_mean": 0.17945427298545838, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019327274756506085, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019327274756506085, "signal/frontier_coverage_1/centered_abs_mean": 0.13515576124191284, "signal/frontier_coverage_1/group_std_mean": 0.17945427298545838, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019327274756506085, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019327274756506085, "signal/frontier_coverage_10/centered_abs_mean": 0.134914430975914, "signal/frontier_coverage_10/group_std_mean": 0.17911535501480103, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019292764598503708, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019292764598503708, "signal/frontier_coverage_15/centered_abs_mean": 0.131193308532238, "signal/frontier_coverage_15/group_std_mean": 0.1743150144815445, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018760643433779478, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018760643433779478, "signal/frontier_coverage_20/centered_abs_mean": 0.10956997573375701, "signal/frontier_coverage_20/group_std_mean": 0.14680338203907012, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001566850603558123, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001566850603558123, "signal/frontier_coverage_25/centered_abs_mean": 0.06231881156563759, "signal/frontier_coverage_25/group_std_mean": 0.08558403998613358, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000891158974263817, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000891158974263817, "signal/frontier_coverage_5/centered_abs_mean": 0.13515576124191284, "signal/frontier_coverage_5/group_std_mean": 0.17945427298545838, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019327274756506085, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019327274756506085, "step": 70 }, { "calibration/aurc": 0.2563588085190992, "calibration/batch_distribution_entropy": 0.8382992480473315, "calibration/buffer_distribution_entropy": 0.8805381439631234, "calibration/confidence_entropy": 0.5695789505685956, "calibration/coverage@0%": 0.02257520030390938, "calibration/coverage@1%": 0.02257520030390938, "calibration/coverage@10%": 0.1989478432794585, "calibration/coverage@15%": 0.27763706140350874, "calibration/coverage@20%": 0.41623903508771926, "calibration/coverage@25%": 0.586359649122807, "calibration/coverage@30%": 0.6760910087719298, "calibration/coverage@5%": 0.027838358198646225, "calibration/ece": 0.10921236148819072, "calibration/mean_confidence": 0.6479703220803527, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002951388888888884, "completions/max_length": 3187.4, "completions/max_terminated_length": 3187.4, "completions/mean_length": 755.7357788085938, "completions/mean_terminated_length": 757.99462890625, "completions/min_length": 0.0, "completions/min_terminated_length": 218.8, "epoch": 0.17999775002812465, "grad_norm": 0.0004290560900699347, "learning_rate": 4.006024096385543e-06, "loss": 0.0003, "num_tokens": 155858486.0, "reward": 1.017405092716217, "reward_std": 0.11944967806339264, "rewards/accuracy_reward": 0.6934895753860474, "rewards/brier_reward": 0.7949100136756897, "rewards/confidence_uniqueness_reward": 0.9398416519165039, "rewards/format_reward": 0.9970486164093018, "rewards/frontier_coverage_0": -0.023628878220915795, "rewards/frontier_coverage_1": -0.023628878220915795, "rewards/frontier_coverage_10": -0.021788668585941195, "rewards/frontier_coverage_15": -0.014927842747420072, "rewards/frontier_coverage_20": -0.003723863745108247, "rewards/frontier_coverage_25": 0.017198705207556488, "rewards/frontier_coverage_5": -0.023147269897162915, "signal/accuracy_reward/centered_abs_mean": 0.16501193642616271, "signal/accuracy_reward/group_std_mean": 0.21951915323734283, "signal/accuracy_reward/group_zero_std_frac": 0.3694444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08250596821308136, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08250596821308136, "signal/advantage_abs_mean": 0.08793365359306335, "signal/advantage_pre_scale_abs_mean": 0.08793365359306335, "signal/advantage_pre_scale_std": 0.14816934764385223, "signal/advantage_std": 0.14816934764385223, "signal/brier_reward/centered_abs_mean": 0.11173765361309052, "signal/brier_reward/group_std_mean": 0.14379720985889435, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011173765547573567, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011173765547573567, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030797071009874343, "signal/confidence_uniqueness_reward/group_std_mean": 0.0448210634291172, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030797069426625966, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030797069426625966, "signal/format_reward/centered_abs_mean": 0.005631510401144624, "signal/format_reward/group_std_mean": 0.01455035675317049, "signal/format_reward/group_zero_std_frac": 0.925, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002815755200572312, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002815755200572312, "signal/frontier_coverage_0/centered_abs_mean": 0.11744404733180999, "signal/frontier_coverage_0/group_std_mean": 0.15851396322250366, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001679449831135571, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001679449831135571, "signal/frontier_coverage_1/centered_abs_mean": 0.11744404733180999, "signal/frontier_coverage_1/group_std_mean": 0.15851396322250366, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001679449831135571, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001679449831135571, "signal/frontier_coverage_10/centered_abs_mean": 0.11422204971313477, "signal/frontier_coverage_10/group_std_mean": 0.15434344708919526, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016333752777427436, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016333752777427436, "signal/frontier_coverage_15/centered_abs_mean": 0.10063839107751846, "signal/frontier_coverage_15/group_std_mean": 0.13689128160476685, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014391290256753563, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014391290256753563, "signal/frontier_coverage_20/centered_abs_mean": 0.07208155021071434, "signal/frontier_coverage_20/group_std_mean": 0.0996133729815483, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001030766183976084, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001030766183976084, "signal/frontier_coverage_25/centered_abs_mean": 0.043751812726259234, "signal/frontier_coverage_25/group_std_mean": 0.06023159921169281, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006256509223021567, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006256509223021567, "signal/frontier_coverage_5/centered_abs_mean": 0.11698432117700577, "signal/frontier_coverage_5/group_std_mean": 0.15789782702922822, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016728756949305535, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016728756949305535, "step": 75 }, { "calibration/aurc": 0.21554772857493187, "calibration/batch_distribution_entropy": 0.7793047369317647, "calibration/buffer_distribution_entropy": 0.859575274807294, "calibration/confidence_entropy": 0.5226812675460125, "calibration/coverage@0%": 0.012215909090909092, "calibration/coverage@1%": 0.012215909090909092, "calibration/coverage@10%": 0.043605169340463455, "calibration/coverage@15%": 0.41333556149732625, "calibration/coverage@20%": 0.47428141711229943, "calibration/coverage@25%": 0.7336229946524064, "calibration/coverage@30%": 0.7927083333333333, "calibration/coverage@5%": 0.01756350267379679, "calibration/ece": 0.11169882524136499, "calibration/mean_confidence": 0.7130084327986905, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005642361111111116, "completions/max_length": 3469.4, "completions/max_terminated_length": 3469.4, "completions/mean_length": 808.5981689453125, "completions/mean_terminated_length": 813.23388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 204.4, "epoch": 0.19199760002999963, "grad_norm": 0.0005567002226598561, "learning_rate": 3.855421686746989e-06, "loss": -0.0042, "num_tokens": 168226817.0, "reward": 0.9941539883613586, "reward_std": 0.13035476952791214, "rewards/accuracy_reward": 0.6485243082046509, "rewards/brier_reward": 0.7861994743347168, "rewards/confidence_uniqueness_reward": 0.9314009189605713, "rewards/format_reward": 0.9942708492279053, "rewards/frontier_coverage_0": 0.005442132381722331, "rewards/frontier_coverage_1": 0.005442132381722331, "rewards/frontier_coverage_10": 0.006071169814094901, "rewards/frontier_coverage_15": 0.007214262872003019, "rewards/frontier_coverage_20": 0.011351470567751676, "rewards/frontier_coverage_25": 0.02855553664267063, "rewards/frontier_coverage_5": 0.005597225157544017, "signal/accuracy_reward/centered_abs_mean": 0.17549370527267455, "signal/accuracy_reward/group_std_mean": 0.23448271155357361, "signal/accuracy_reward/group_zero_std_frac": 0.32222222089767455, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08774685263633727, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08774685263633727, "signal/advantage_abs_mean": 0.09637740403413772, "signal/advantage_pre_scale_abs_mean": 0.09637740403413772, "signal/advantage_pre_scale_std": 0.16058520078659058, "signal/advantage_std": 0.16058520078659058, "signal/brier_reward/centered_abs_mean": 0.12150197178125381, "signal/brier_reward/group_std_mean": 0.15757565200328827, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01215019728988409, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01215019728988409, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.038753630965948103, "signal/confidence_uniqueness_reward/group_std_mean": 0.05312336310744285, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003875363012775779, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003875363012775779, "signal/format_reward/centered_abs_mean": 0.00966796875, "signal/format_reward/group_std_mean": 0.018474388308823107, "signal/format_reward/group_zero_std_frac": 0.9222222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004833984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004833984375, "signal/frontier_coverage_0/centered_abs_mean": 0.10300857871770859, "signal/frontier_coverage_0/group_std_mean": 0.14024612605571746, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014730226481333374, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014730226481333374, "signal/frontier_coverage_1/centered_abs_mean": 0.10300857871770859, "signal/frontier_coverage_1/group_std_mean": 0.14024612605571746, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014730226481333374, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014730226481333374, "signal/frontier_coverage_10/centered_abs_mean": 0.0971254363656044, "signal/frontier_coverage_10/group_std_mean": 0.132836189866066, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001388893718831241, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001388893718831241, "signal/frontier_coverage_15/centered_abs_mean": 0.08325443416833878, "signal/frontier_coverage_15/group_std_mean": 0.11486416459083557, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011905384133569896, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011905384133569896, "signal/frontier_coverage_20/centered_abs_mean": 0.056342567503452304, "signal/frontier_coverage_20/group_std_mean": 0.07883516997098923, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008056987193413079, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008056987193413079, "signal/frontier_coverage_25/centered_abs_mean": 0.04240647032856941, "signal/frontier_coverage_25/group_std_mean": 0.05655211955308914, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006064124754630029, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006064124754630029, "signal/frontier_coverage_5/centered_abs_mean": 0.10220045298337936, "signal/frontier_coverage_5/group_std_mean": 0.1392603486776352, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014614664250984788, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014614664250984788, "step": 80 }, { "calibration/aurc": 0.22017950153274862, "calibration/batch_distribution_entropy": 0.8714044348934851, "calibration/buffer_distribution_entropy": 0.8522211124256589, "calibration/confidence_entropy": 0.5380838558752095, "calibration/coverage@0%": 0.036259588016304314, "calibration/coverage@1%": 0.036259588016304314, "calibration/coverage@10%": 0.11721307560118954, "calibration/coverage@15%": 0.3834662270669858, "calibration/coverage@20%": 0.4646737963842374, "calibration/coverage@25%": 0.6478996052484998, "calibration/coverage@30%": 0.8137590066730672, "calibration/coverage@5%": 0.059398362336180996, "calibration/ece": 0.11638557739382216, "calibration/mean_confidence": 0.6332810296217062, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004427083333333348, "completions/max_length": 3764.4, "completions/max_terminated_length": 3764.4, "completions/mean_length": 794.3666748046875, "completions/mean_terminated_length": 797.9120361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 225.6, "epoch": 0.2039974500318746, "grad_norm": 0.00044087867718189955, "learning_rate": 3.7048192771084342e-06, "loss": -0.0022, "num_tokens": 180465121.0, "reward": 1.0135318279266357, "reward_std": 0.13029464483261108, "rewards/accuracy_reward": 0.6832465171813965, "rewards/brier_reward": 0.8005435109138489, "rewards/confidence_uniqueness_reward": 0.9350399494171142, "rewards/format_reward": 0.9953993082046508, "rewards/frontier_coverage_0": -0.0016260695294477046, "rewards/frontier_coverage_1": -0.0016260695294477046, "rewards/frontier_coverage_10": -0.0008423494873568416, "rewards/frontier_coverage_15": 0.00238975181709975, "rewards/frontier_coverage_20": 0.011206477042287588, "rewards/frontier_coverage_25": 0.0376150730997324, "rewards/frontier_coverage_5": -0.0016260695294477046, "signal/accuracy_reward/centered_abs_mean": 0.17951931357383727, "signal/accuracy_reward/group_std_mean": 0.23702281415462495, "signal/accuracy_reward/group_zero_std_frac": 0.3305555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08975965678691863, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08975965678691863, "signal/advantage_abs_mean": 0.09476174563169479, "signal/advantage_pre_scale_abs_mean": 0.09476174563169479, "signal/advantage_pre_scale_std": 0.15883181095123292, "signal/advantage_std": 0.15883181095123292, "signal/brier_reward/centered_abs_mean": 0.11919141858816147, "signal/brier_reward/group_std_mean": 0.1569477528333664, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011919141374528408, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011919141374528408, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.034977962449193004, "signal/confidence_uniqueness_reward/group_std_mean": 0.052396781742572784, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034977963194251062, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034977963194251062, "signal/format_reward/centered_abs_mean": 0.008707682136446238, "signal/format_reward/group_std_mean": 0.02160101868212223, "signal/format_reward/group_zero_std_frac": 0.8916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004353841068223119, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004353841068223119, "signal/frontier_coverage_0/centered_abs_mean": 0.11353013217449189, "signal/frontier_coverage_0/group_std_mean": 0.15329338610172272, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016234809532761573, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016234809532761573, "signal/frontier_coverage_1/centered_abs_mean": 0.11353013217449189, "signal/frontier_coverage_1/group_std_mean": 0.15329338610172272, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016234809532761573, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016234809532761573, "signal/frontier_coverage_10/centered_abs_mean": 0.11054201275110245, "signal/frontier_coverage_10/group_std_mean": 0.14934370666742325, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015807508490979672, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015807508490979672, "signal/frontier_coverage_15/centered_abs_mean": 0.09602530598640442, "signal/frontier_coverage_15/group_std_mean": 0.13076421320438386, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013731618179008364, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013731618179008364, "signal/frontier_coverage_20/centered_abs_mean": 0.058435800671577456, "signal/frontier_coverage_20/group_std_mean": 0.08124178051948547, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008356319856829941, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008356319856829941, "signal/frontier_coverage_25/centered_abs_mean": 0.045007632672786714, "signal/frontier_coverage_25/group_std_mean": 0.05981680378317833, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006436091498471797, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006436091498471797, "signal/frontier_coverage_5/centered_abs_mean": 0.11353013217449189, "signal/frontier_coverage_5/group_std_mean": 0.15329338610172272, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016234809532761573, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016234809532761573, "step": 85 }, { "calibration/aurc": 0.19130533906993302, "calibration/batch_distribution_entropy": 0.8412225489716348, "calibration/buffer_distribution_entropy": 0.8626075627761844, "calibration/confidence_entropy": 0.5053431016428965, "calibration/coverage@0%": 0.03859342792359489, "calibration/coverage@1%": 0.03859342792359489, "calibration/coverage@10%": 0.25211929092498037, "calibration/coverage@15%": 0.3443653155241082, "calibration/coverage@20%": 0.4048542125930009, "calibration/coverage@25%": 0.831518123679618, "calibration/coverage@30%": 0.9070597960870763, "calibration/coverage@5%": 0.09598164593926069, "calibration/ece": 0.0995040290382317, "calibration/mean_confidence": 0.6870986648020756, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006163194444444442, "completions/max_length": 3540.4, "completions/max_terminated_length": 3540.4, "completions/mean_length": 758.8940307617188, "completions/mean_terminated_length": 763.611083984375, "completions/min_length": 0.0, "completions/min_terminated_length": 206.2, "epoch": 0.2159973000337496, "grad_norm": 0.0005134688108228147, "learning_rate": 3.5542168674698798e-06, "loss": -0.0033, "num_tokens": 192276252.0, "reward": 1.0077428221702576, "reward_std": 0.12586894929409026, "rewards/accuracy_reward": 0.6733506917953491, "rewards/brier_reward": 0.7962008833885192, "rewards/confidence_uniqueness_reward": 0.9313777089118958, "rewards/format_reward": 0.9938367962837219, "rewards/frontier_coverage_0": 0.004424169240519404, "rewards/frontier_coverage_1": 0.004424169240519404, "rewards/frontier_coverage_10": 0.005338566357386299, "rewards/frontier_coverage_15": 0.009187003783881664, "rewards/frontier_coverage_20": 0.01867530047893524, "rewards/frontier_coverage_25": 0.05060422196984291, "rewards/frontier_coverage_5": 0.004631689615052892, "signal/accuracy_reward/centered_abs_mean": 0.16591254472732545, "signal/accuracy_reward/group_std_mean": 0.21845885515213012, "signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08295627236366272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08295627236366272, "signal/advantage_abs_mean": 0.09228640496730804, "signal/advantage_pre_scale_abs_mean": 0.09228640496730804, "signal/advantage_pre_scale_std": 0.15987550914287568, "signal/advantage_std": 0.15987550914287568, "signal/brier_reward/centered_abs_mean": 0.117860808968544, "signal/brier_reward/group_std_mean": 0.1544253945350647, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011786081641912461, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011786081641912461, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03850234746932983, "signal/confidence_uniqueness_reward/group_std_mean": 0.056425672769546506, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038502346724271774, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038502346724271774, "signal/format_reward/centered_abs_mean": 0.011311848741024732, "signal/format_reward/group_std_mean": 0.02502160966396332, "signal/format_reward/group_zero_std_frac": 0.8833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005655924370512366, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005655924370512366, "signal/frontier_coverage_0/centered_abs_mean": 0.09827356785535812, "signal/frontier_coverage_0/group_std_mean": 0.13401113748550414, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001405311981216073, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001405311981216073, "signal/frontier_coverage_1/centered_abs_mean": 0.09827356785535812, "signal/frontier_coverage_1/group_std_mean": 0.13401113748550414, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001405311981216073, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001405311981216073, "signal/frontier_coverage_10/centered_abs_mean": 0.09452640563249588, "signal/frontier_coverage_10/group_std_mean": 0.12922739684581758, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013517276151105762, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013517276151105762, "signal/frontier_coverage_15/centered_abs_mean": 0.07233314439654351, "signal/frontier_coverage_15/group_std_mean": 0.10092607736587525, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010343640227802099, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010343640227802099, "signal/frontier_coverage_20/centered_abs_mean": 0.049670548737049104, "signal/frontier_coverage_20/group_std_mean": 0.06895174235105514, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007102888310328126, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007102888310328126, "signal/frontier_coverage_25/centered_abs_mean": 0.05155856236815452, "signal/frontier_coverage_25/group_std_mean": 0.067772376537323, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007372874300926924, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007372874300926924, "signal/frontier_coverage_5/centered_abs_mean": 0.09758316129446029, "signal/frontier_coverage_5/group_std_mean": 0.13312698602676393, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013954391703009605, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013954391703009605, "step": 90 }, { "calibration/aurc": 0.23218282614032365, "calibration/batch_distribution_entropy": 0.8237960782553888, "calibration/buffer_distribution_entropy": 0.8804108398849207, "calibration/confidence_entropy": 0.5243356933254755, "calibration/coverage@0%": 0.01052649005750661, "calibration/coverage@1%": 0.01052649005750661, "calibration/coverage@10%": 0.09966577080811616, "calibration/coverage@15%": 0.431924512069393, "calibration/coverage@20%": 0.617088188355147, "calibration/coverage@25%": 0.6702744892179752, "calibration/coverage@30%": 0.710721242177679, "calibration/coverage@5%": 0.03245860494001314, "calibration/ece": 0.1193284582303358, "calibration/mean_confidence": 0.6877564386884953, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006163194444444442, "completions/max_length": 3526.8, "completions/max_terminated_length": 3526.8, "completions/mean_length": 766.1975708007812, "completions/mean_terminated_length": 770.9320678710938, "completions/min_length": 0.0, "completions/min_terminated_length": 220.8, "epoch": 0.22799715003562457, "grad_norm": 0.0004157455696258694, "learning_rate": 3.4036144578313257e-06, "loss": -0.004, "num_tokens": 204194528.0, "reward": 1.0064563512802125, "reward_std": 0.12273335456848145, "rewards/accuracy_reward": 0.6696180582046509, "rewards/brier_reward": 0.7975351452827454, "rewards/confidence_uniqueness_reward": 0.9334475994110107, "rewards/format_reward": 0.9934895753860473, "rewards/frontier_coverage_0": 0.007734180334955454, "rewards/frontier_coverage_1": 0.007734180334955454, "rewards/frontier_coverage_10": 0.008875045739114285, "rewards/frontier_coverage_15": 0.013548683421686292, "rewards/frontier_coverage_20": 0.02269660122692585, "rewards/frontier_coverage_25": 0.05776782408356666, "rewards/frontier_coverage_5": 0.007814234215766191, "signal/accuracy_reward/centered_abs_mean": 0.1544704854488373, "signal/accuracy_reward/group_std_mean": 0.2076838880777359, "signal/accuracy_reward/group_zero_std_frac": 0.4, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07723524272441865, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07723524272441865, "signal/advantage_abs_mean": 0.08850989192724228, "signal/advantage_pre_scale_abs_mean": 0.08850989192724228, "signal/advantage_pre_scale_std": 0.1567450851202011, "signal/advantage_std": 0.1567450851202011, "signal/brier_reward/centered_abs_mean": 0.11523358970880508, "signal/brier_reward/group_std_mean": 0.15127619802951814, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011523359268903733, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011523359268903733, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.036900246143341066, "signal/confidence_uniqueness_reward/group_std_mean": 0.05565410703420639, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036900244653224946, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036900244653224946, "signal/format_reward/centered_abs_mean": 0.01131184899713844, "signal/format_reward/group_std_mean": 0.026015446335077286, "signal/format_reward/group_zero_std_frac": 0.8722222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00565592449856922, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00565592449856922, "signal/frontier_coverage_0/centered_abs_mean": 0.09024225771427155, "signal/frontier_coverage_0/group_std_mean": 0.12343428432941436, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012904643081128597, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012904643081128597, "signal/frontier_coverage_1/centered_abs_mean": 0.09024225771427155, "signal/frontier_coverage_1/group_std_mean": 0.12343428432941436, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012904643081128597, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012904643081128597, "signal/frontier_coverage_10/centered_abs_mean": 0.08697677999734879, "signal/frontier_coverage_10/group_std_mean": 0.1192005679011345, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012437679572030902, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012437679572030902, "signal/frontier_coverage_15/centered_abs_mean": 0.06753548979759216, "signal/frontier_coverage_15/group_std_mean": 0.09412883222103119, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009657575283199549, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009657575283199549, "signal/frontier_coverage_20/centered_abs_mean": 0.047610755264759066, "signal/frontier_coverage_20/group_std_mean": 0.06612022668123245, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006808338337577879, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006808338337577879, "signal/frontier_coverage_25/centered_abs_mean": 0.05751822665333748, "signal/frontier_coverage_25/group_std_mean": 0.07561186105012893, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008225106517784298, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008225106517784298, "signal/frontier_coverage_5/centered_abs_mean": 0.08994513750076294, "signal/frontier_coverage_5/group_std_mean": 0.12307111024856568, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012862155679613351, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012862155679613351, "step": 95 }, { "calibration/aurc": 0.20759978881743368, "calibration/batch_distribution_entropy": 0.7872453416290367, "calibration/buffer_distribution_entropy": 0.851199244324374, "calibration/confidence_entropy": 0.5093936144746041, "calibration/coverage@0%": 0.012143117253085606, "calibration/coverage@1%": 0.012143117253085606, "calibration/coverage@10%": 0.10598727560385317, "calibration/coverage@15%": 0.2561893629834925, "calibration/coverage@20%": 0.49089280471040475, "calibration/coverage@25%": 0.6995251651659927, "calibration/coverage@30%": 0.8823003139129817, "calibration/coverage@5%": 0.031241260489159883, "calibration/ece": 0.0877942040353775, "calibration/mean_confidence": 0.7242433693125331, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01328125, "completions/max_length": 3636.2, "completions/max_terminated_length": 3636.2, "completions/mean_length": 782.308251953125, "completions/mean_terminated_length": 792.8037231445312, "completions/min_length": 0.0, "completions/min_terminated_length": 226.6, "epoch": 0.23999700003749952, "grad_norm": 0.00045198958832770586, "learning_rate": 3.2530120481927713e-06, "loss": -0.0101, "num_tokens": 216305791.0, "reward": 1.0077686071395875, "reward_std": 0.13463030606508256, "rewards/accuracy_reward": 0.6796875, "rewards/brier_reward": 0.7993229150772094, "rewards/confidence_uniqueness_reward": 0.9266997456550599, "rewards/format_reward": 0.9866319417953491, "rewards/frontier_coverage_0": 0.009804848302155732, "rewards/frontier_coverage_1": 0.009804848302155732, "rewards/frontier_coverage_10": 0.01042446969076991, "rewards/frontier_coverage_15": 0.012922577001154423, "rewards/frontier_coverage_20": 0.02126994784921408, "rewards/frontier_coverage_25": 0.06628896966576577, "rewards/frontier_coverage_5": 0.009804848302155732, "signal/accuracy_reward/centered_abs_mean": 0.16714409589767457, "signal/accuracy_reward/group_std_mean": 0.21892527341842652, "signal/accuracy_reward/group_zero_std_frac": 0.38611111640930174, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08357204794883728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08357204794883728, "signal/advantage_abs_mean": 0.09943573027849198, "signal/advantage_pre_scale_abs_mean": 0.09943573027849198, "signal/advantage_pre_scale_std": 0.17322509586811066, "signal/advantage_std": 0.17322509586811066, "signal/brier_reward/centered_abs_mean": 0.12519195675849915, "signal/brier_reward/group_std_mean": 0.1623040735721588, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012519196048378945, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012519196048378945, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04277070388197899, "signal/confidence_uniqueness_reward/group_std_mean": 0.06450802609324455, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004277070425450802, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004277070425450802, "signal/format_reward/centered_abs_mean": 0.02023654468357563, "signal/format_reward/group_std_mean": 0.03803690262138844, "signal/format_reward/group_zero_std_frac": 0.8444444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010118272341787814, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010118272341787814, "signal/frontier_coverage_0/centered_abs_mean": 0.08244038820266723, "signal/frontier_coverage_0/group_std_mean": 0.11372108608484269, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0011788975214585661, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0011788975214585661, "signal/frontier_coverage_1/centered_abs_mean": 0.08244038820266723, "signal/frontier_coverage_1/group_std_mean": 0.11372108608484269, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0011788975214585661, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0011788975214585661, "signal/frontier_coverage_10/centered_abs_mean": 0.07991492450237274, "signal/frontier_coverage_10/group_std_mean": 0.11050502359867095, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0011427834630012511, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011427834630012511, "signal/frontier_coverage_15/centered_abs_mean": 0.0665148988366127, "signal/frontier_coverage_15/group_std_mean": 0.09321689903736115, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009511630749329924, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009511630749329924, "signal/frontier_coverage_20/centered_abs_mean": 0.04453737959265709, "signal/frontier_coverage_20/group_std_mean": 0.06285597681999207, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006368845235556364, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006368845235556364, "signal/frontier_coverage_25/centered_abs_mean": 0.060834895074367526, "signal/frontier_coverage_25/group_std_mean": 0.08100210577249527, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008699389640241861, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008699389640241861, "signal/frontier_coverage_5/centered_abs_mean": 0.08244038820266723, "signal/frontier_coverage_5/group_std_mean": 0.11372108608484269, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0011788975214585661, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0011788975214585661, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 0.13921226805258638, "eval_calibration/batch_distribution_entropy": 0.7191935689276043, "eval_calibration/buffer_distribution_entropy": 0.8323235519691029, "eval_calibration/confidence_entropy": 0.47122875449565105, "eval_calibration/coverage@0%": 0.16717069892473116, "eval_calibration/coverage@1%": 0.16717069892473116, "eval_calibration/coverage@10%": 0.46639784946236557, "eval_calibration/coverage@15%": 0.6330645161290323, "eval_calibration/coverage@20%": 0.837869623655914, "eval_calibration/coverage@25%": 0.9321236559139785, "eval_calibration/coverage@30%": 0.9635416666666666, "eval_calibration/coverage@5%": 0.24529569892473116, "eval_calibration/ece": 0.15433622692672344, "eval_calibration/mean_confidence": 0.7643868913528463, "eval_completions/clipped_ratio": 0.014756944444444456, "eval_completions/max_length": 2571.1666666666665, "eval_completions/max_terminated_length": 2571.1666666666665, "eval_completions/mean_length": 752.1073099772135, "eval_completions/mean_terminated_length": 763.3598937988281, "eval_completions/min_length": 104.16666666666667, "eval_completions/min_terminated_length": 271.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 216305791.0, "eval_reward": 0.9933919807275137, "eval_reward_std": 0.27725009868542355, "eval_rewards/accuracy_reward": 0.6710069477558136, "eval_rewards/brier_reward": 0.7916092475255331, "eval_rewards/confidence_uniqueness_reward": 0.8643936216831207, "eval_rewards/format_reward": 0.980902781089147, "eval_rewards/frontier_coverage_0": 0.011102605417060355, "eval_rewards/frontier_coverage_1": 0.011102605417060355, "eval_rewards/frontier_coverage_10": 0.011642855126410723, "eval_rewards/frontier_coverage_15": 0.012925609441784522, "eval_rewards/frontier_coverage_20": 0.017780127624670666, "eval_rewards/frontier_coverage_25": 0.05278685626884302, "eval_rewards/frontier_coverage_5": 0.011105729693857333, "eval_runtime": 206.9157, "eval_samples_per_second": 4.833, "eval_signal/accuracy_reward/centered_abs_mean": 0.4260525157054265, "eval_signal/accuracy_reward/group_std_mean": 0.4678276677926381, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21302625785271326, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21302625785271326, "eval_signal/advantage_abs_mean": 0.2413168102502823, "eval_signal/advantage_pre_scale_abs_mean": 0.2413168102502823, "eval_signal/advantage_pre_scale_std": 0.2765214368700981, "eval_signal/advantage_std": 0.2765214368700981, "eval_signal/brier_reward/centered_abs_mean": 0.2165469080209732, "eval_signal/brier_reward/group_std_mean": 0.26921579490105313, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021654691236714523, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.021654691236714523, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06999108629922073, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.11411779932677746, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006999108707532287, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006999108707532287, "eval_signal/format_reward/centered_abs_mean": 0.03613281218955914, "eval_signal/format_reward/group_std_mean": 0.08657800406217575, "eval_signal/format_reward/group_zero_std_frac": 0.583333338300387, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.01806640609477957, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.01806640609477957, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.08888960257172585, "eval_signal/frontier_coverage_0/group_std_mean": 0.14864219104250273, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012711213203147054, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012711213203147054, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.08888960257172585, "eval_signal/frontier_coverage_1/group_std_mean": 0.14864219104250273, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012711213203147054, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012711213203147054, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.08579947799444199, "eval_signal/frontier_coverage_10/group_std_mean": 0.14465376734733582, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012269325282735128, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012269325282735128, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.06808544136583805, "eval_signal/frontier_coverage_15/group_std_mean": 0.12035164733727773, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009736218586719284, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009736218586719284, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.04719839679698149, "eval_signal/frontier_coverage_20/group_std_mean": 0.08246325453122456, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006749370562223097, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006749370562223097, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.09537930289904277, "eval_signal/frontier_coverage_25/group_std_mean": 0.12517398471633592, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001363924064207822, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001363924064207822, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.08822002758582433, "eval_signal/frontier_coverage_5/group_std_mean": 0.1477730112771193, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012615464123276372, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012615464123276372, "eval_steps_per_second": 0.029, "step": 100 }, { "calibration/aurc": 0.3479992140027637, "calibration/batch_distribution_entropy": 0.750787770254411, "calibration/buffer_distribution_entropy": 0.8211954941825012, "calibration/confidence_entropy": 0.46615226530585224, "calibration/coverage@0%": 0.019922428534220153, "calibration/coverage@1%": 0.019922428534220153, "calibration/coverage@10%": 0.12016627849477383, "calibration/coverage@15%": 0.1617470000037748, "calibration/coverage@20%": 0.19590033859661704, "calibration/coverage@25%": 0.21370138571703592, "calibration/coverage@30%": 0.3333657334183914, "calibration/coverage@5%": 0.09898002015725679, "calibration/ece": 0.17690751887463851, "calibration/mean_confidence": 0.7663661401788909, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020659722222222232, "completions/max_length": 3665.8, "completions/max_terminated_length": 3665.8, "completions/mean_length": 753.8394897460937, "completions/mean_terminated_length": 769.899658203125, "completions/min_length": 0.0, "completions/min_terminated_length": 205.6, "epoch": 0.2519968500393745, "grad_norm": 0.00047751839156262577, "learning_rate": 3.1024096385542172e-06, "loss": -0.017, "num_tokens": 228066886.0, "reward": 0.9950996160507202, "reward_std": 0.14413480460643768, "rewards/accuracy_reward": 0.6709201335906982, "rewards/brier_reward": 0.774049949645996, "rewards/confidence_uniqueness_reward": 0.9144436717033386, "rewards/format_reward": 0.9793402791023255, "rewards/frontier_coverage_0": 0.0029952601238619537, "rewards/frontier_coverage_1": 0.0029952601238619537, "rewards/frontier_coverage_10": 0.002985831905971281, "rewards/frontier_coverage_15": 0.005100842425599694, "rewards/frontier_coverage_20": 0.011829984840005636, "rewards/frontier_coverage_25": 0.04955209493637085, "rewards/frontier_coverage_5": 0.002863927249563858, "signal/accuracy_reward/centered_abs_mean": 0.16414388120174409, "signal/accuracy_reward/group_std_mean": 0.2145601123571396, "signal/accuracy_reward/group_zero_std_frac": 0.402777773141861, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08207194060087204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08207194060087204, "signal/advantage_abs_mean": 0.10552676767110825, "signal/advantage_pre_scale_abs_mean": 0.10552676767110825, "signal/advantage_pre_scale_std": 0.18652166426181793, "signal/advantage_std": 0.18652166426181793, "signal/brier_reward/centered_abs_mean": 0.13289882838726044, "signal/brier_reward/group_std_mean": 0.17235172688961028, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01328988280147314, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01328988280147314, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05428531989455223, "signal/confidence_uniqueness_reward/group_std_mean": 0.0814499482512474, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00542853195220232, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00542853195220232, "signal/format_reward/centered_abs_mean": 0.03229166679084301, "signal/format_reward/group_std_mean": 0.056061620265245436, "signal/format_reward/group_zero_std_frac": 0.7833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.016145833395421506, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016145833395421506, "signal/frontier_coverage_0/centered_abs_mean": 0.06614647805690765, "signal/frontier_coverage_0/group_std_mean": 0.09260518848896027, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0009458946529775858, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0009458946529775858, "signal/frontier_coverage_1/centered_abs_mean": 0.06614647805690765, "signal/frontier_coverage_1/group_std_mean": 0.09260518848896027, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0009458946529775858, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0009458946529775858, "signal/frontier_coverage_10/centered_abs_mean": 0.06400079652667046, "signal/frontier_coverage_10/group_std_mean": 0.089921535551548, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0009152113692834973, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009152113692834973, "signal/frontier_coverage_15/centered_abs_mean": 0.05834746509790421, "signal/frontier_coverage_15/group_std_mean": 0.08256930112838745, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00083436876302585, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00083436876302585, "signal/frontier_coverage_20/centered_abs_mean": 0.03862107619643211, "signal/frontier_coverage_20/group_std_mean": 0.055405861139297484, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005522813764400781, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005522813764400781, "signal/frontier_coverage_25/centered_abs_mean": 0.04953863024711609, "signal/frontier_coverage_25/group_std_mean": 0.06496639400720597, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007084024371579289, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007084024371579289, "signal/frontier_coverage_5/centered_abs_mean": 0.06591839194297791, "signal/frontier_coverage_5/group_std_mean": 0.09233485758304597, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0009426330449059606, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0009426330449059606, "step": 105 }, { "calibration/aurc": 0.24207440577624634, "calibration/batch_distribution_entropy": 0.6875765593814578, "calibration/buffer_distribution_entropy": 0.7860359646652466, "calibration/confidence_entropy": 0.4102154748788104, "calibration/coverage@0%": 0.022595843714871777, "calibration/coverage@1%": 0.022595843714871777, "calibration/coverage@10%": 0.2102467277237638, "calibration/coverage@15%": 0.2534104615446351, "calibration/coverage@20%": 0.3359725625062341, "calibration/coverage@25%": 0.5277390933544955, "calibration/coverage@30%": 0.5790548647469459, "calibration/coverage@5%": 0.11875024921629493, "calibration/ece": 0.15191207953492325, "calibration/mean_confidence": 0.800740167621276, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018663194444444465, "completions/max_length": 3719.2, "completions/max_terminated_length": 3719.2, "completions/mean_length": 743.8192993164063, "completions/mean_terminated_length": 758.0759155273438, "completions/min_length": 0.0, "completions/min_terminated_length": 211.2, "epoch": 0.2639967000412495, "grad_norm": 0.0006374148651957512, "learning_rate": 2.9518072289156627e-06, "loss": -0.0149, "num_tokens": 239744132.0, "reward": 1.0158015012741088, "reward_std": 0.13897253274917604, "rewards/accuracy_reward": 0.7074652791023255, "rewards/brier_reward": 0.7943165302276611, "rewards/confidence_uniqueness_reward": 0.9072112798690796, "rewards/format_reward": 0.98125, "rewards/frontier_coverage_0": 0.002520253928378224, "rewards/frontier_coverage_1": 0.002520253928378224, "rewards/frontier_coverage_10": 0.002841651951894164, "rewards/frontier_coverage_15": 0.0039699568413198, "rewards/frontier_coverage_20": 0.013109351228922605, "rewards/frontier_coverage_25": 0.0626706637442112, "rewards/frontier_coverage_5": 0.0026505836751312016, "signal/accuracy_reward/centered_abs_mean": 0.1546115458011627, "signal/accuracy_reward/group_std_mean": 0.21346699297428132, "signal/accuracy_reward/group_zero_std_frac": 0.3583333343267441, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07730577290058135, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07730577290058135, "signal/advantage_abs_mean": 0.09841311872005462, "signal/advantage_pre_scale_abs_mean": 0.09841311872005462, "signal/advantage_pre_scale_std": 0.1800345003604889, "signal/advantage_std": 0.1800345003604889, "signal/brier_reward/centered_abs_mean": 0.12525416761636735, "signal/brier_reward/group_std_mean": 0.16696035861968994, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012525417283177376, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012525417283177376, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0548114612698555, "signal/confidence_uniqueness_reward/group_std_mean": 0.08105210959911346, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005481146275997162, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005481146275997162, "signal/format_reward/centered_abs_mean": 0.02778862789273262, "signal/format_reward/group_std_mean": 0.04899119287729263, "signal/format_reward/group_zero_std_frac": 0.8055555820465088, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01389431394636631, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01389431394636631, "signal/frontier_coverage_0/centered_abs_mean": 0.056989597529172896, "signal/frontier_coverage_0/group_std_mean": 0.08076736629009247, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0008149512344971299, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0008149512344971299, "signal/frontier_coverage_1/centered_abs_mean": 0.056989597529172896, "signal/frontier_coverage_1/group_std_mean": 0.08076736629009247, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0008149512344971299, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0008149512344971299, "signal/frontier_coverage_10/centered_abs_mean": 0.05564908087253571, "signal/frontier_coverage_10/group_std_mean": 0.07900142818689346, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007957818452268839, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007957818452268839, "signal/frontier_coverage_15/centered_abs_mean": 0.0526436798274517, "signal/frontier_coverage_15/group_std_mean": 0.0750760056078434, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007528046262450516, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007528046262450516, "signal/frontier_coverage_20/centered_abs_mean": 0.037152212113142014, "signal/frontier_coverage_20/group_std_mean": 0.05306043922901153, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005312766588758677, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005312766588758677, "signal/frontier_coverage_25/centered_abs_mean": 0.04958587661385536, "signal/frontier_coverage_25/group_std_mean": 0.06510179191827774, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000709078018553555, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000709078018553555, "signal/frontier_coverage_5/centered_abs_mean": 0.05671231150627136, "signal/frontier_coverage_5/group_std_mean": 0.08039210587739945, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0008109860471449792, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0008109860471449792, "step": 110 }, { "calibration/aurc": 0.33356083398146347, "calibration/batch_distribution_entropy": 0.6959018763618702, "calibration/buffer_distribution_entropy": 0.7522820617818654, "calibration/confidence_entropy": 0.4518816489007994, "calibration/coverage@0%": 0.0032267264856438273, "calibration/coverage@1%": 0.0032267264856438273, "calibration/coverage@10%": 0.044937956432167886, "calibration/coverage@15%": 0.08441266677084883, "calibration/coverage@20%": 0.15283620421861482, "calibration/coverage@25%": 0.3622388517951728, "calibration/coverage@30%": 0.5879621018367821, "calibration/coverage@5%": 0.0032267264856438273, "calibration/ece": 0.20717975780837206, "calibration/mean_confidence": 0.7802978326710153, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02144097222222221, "completions/max_length": 3724.6, "completions/max_terminated_length": 3724.6, "completions/mean_length": 718.7766479492187, "completions/mean_terminated_length": 734.714306640625, "completions/min_length": 0.0, "completions/min_terminated_length": 199.2, "epoch": 0.27599655004312446, "grad_norm": 0.0006048035575076938, "learning_rate": 2.8012048192771087e-06, "loss": -0.0178, "num_tokens": 251103639.0, "reward": 0.9875237822532654, "reward_std": 0.1449933499097824, "rewards/accuracy_reward": 0.6601562619209289, "rewards/brier_reward": 0.7643703937530517, "rewards/confidence_uniqueness_reward": 0.9071934223175049, "rewards/format_reward": 0.9785590291023254, "rewards/frontier_coverage_0": 0.004726332519203425, "rewards/frontier_coverage_1": 0.004726332519203425, "rewards/frontier_coverage_10": 0.005253351200371981, "rewards/frontier_coverage_15": 0.005590797681361437, "rewards/frontier_coverage_20": 0.008738029189407826, "rewards/frontier_coverage_25": 0.03678738847374916, "rewards/frontier_coverage_5": 0.004789900593459606, "signal/accuracy_reward/centered_abs_mean": 0.1631022125482559, "signal/accuracy_reward/group_std_mean": 0.2127610206604004, "signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08155110627412795, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08155110627412795, "signal/advantage_abs_mean": 0.10652477592229843, "signal/advantage_pre_scale_abs_mean": 0.10652477592229843, "signal/advantage_pre_scale_std": 0.18951753973960878, "signal/advantage_std": 0.18951753973960878, "signal/brier_reward/centered_abs_mean": 0.13075682073831557, "signal/brier_reward/group_std_mean": 0.17059859931468963, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013075682520866393, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013075682520866393, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05514480024576187, "signal/confidence_uniqueness_reward/group_std_mean": 0.08304563462734223, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005514480173587799, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005514480173587799, "signal/format_reward/centered_abs_mean": 0.03312174491584301, "signal/format_reward/group_std_mean": 0.05764241740107536, "signal/format_reward/group_zero_std_frac": 0.7777777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.016560872457921504, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016560872457921504, "signal/frontier_coverage_0/centered_abs_mean": 0.051223869621753695, "signal/frontier_coverage_0/group_std_mean": 0.07264740690588951, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0007325013517402113, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0007325013517402113, "signal/frontier_coverage_1/centered_abs_mean": 0.051223869621753695, "signal/frontier_coverage_1/group_std_mean": 0.07264740690588951, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0007325013517402113, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0007325013517402113, "signal/frontier_coverage_10/centered_abs_mean": 0.04941959977149964, "signal/frontier_coverage_10/group_std_mean": 0.07031489610671997, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007067002821713686, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007067002821713686, "signal/frontier_coverage_15/centered_abs_mean": 0.04780538156628609, "signal/frontier_coverage_15/group_std_mean": 0.06820949018001557, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0006836169632151723, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0006836169632151723, "signal/frontier_coverage_20/centered_abs_mean": 0.03383687846362591, "signal/frontier_coverage_20/group_std_mean": 0.0490049920976162, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0004838673456106335, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0004838673456106335, "signal/frontier_coverage_25/centered_abs_mean": 0.04597809240221977, "signal/frontier_coverage_25/group_std_mean": 0.06020479202270508, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006574866769369691, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006574866769369691, "signal/frontier_coverage_5/centered_abs_mean": 0.05090032443404198, "signal/frontier_coverage_5/group_std_mean": 0.07223524823784828, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0007278746110387146, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0007278746110387146, "step": 115 }, { "calibration/aurc": 0.31065150593398905, "calibration/batch_distribution_entropy": 0.7314189320033493, "calibration/buffer_distribution_entropy": 0.7229353609548825, "calibration/confidence_entropy": 0.4617376720170327, "calibration/coverage@0%": 0.018317815283684445, "calibration/coverage@1%": 0.018317815283684445, "calibration/coverage@10%": 0.0586319514093389, "calibration/coverage@15%": 0.1323828570454714, "calibration/coverage@20%": 0.20926282095672583, "calibration/coverage@25%": 0.5352344590321512, "calibration/coverage@30%": 0.5989735572236128, "calibration/coverage@5%": 0.022506296959077115, "calibration/ece": 0.1755218508227229, "calibration/mean_confidence": 0.7753019329613121, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015451388888888862, "completions/max_length": 3661.4, "completions/max_terminated_length": 3661.4, "completions/mean_length": 682.734814453125, "completions/mean_terminated_length": 693.4420043945313, "completions/min_length": 0.0, "completions/min_terminated_length": 205.6, "epoch": 0.28799640004499943, "grad_norm": 0.0003586419625207782, "learning_rate": 2.6506024096385547e-06, "loss": -0.0125, "num_tokens": 262050600.0, "reward": 1.0042026162147522, "reward_std": 0.13399964123964309, "rewards/accuracy_reward": 0.6789930582046508, "rewards/brier_reward": 0.7894548892974853, "rewards/confidence_uniqueness_reward": 0.9230864763259887, "rewards/format_reward": 0.9845486044883728, "rewards/frontier_coverage_0": 0.007614323310554028, "rewards/frontier_coverage_1": 0.007614323310554028, "rewards/frontier_coverage_10": 0.007986792828887701, "rewards/frontier_coverage_15": 0.009333854354918004, "rewards/frontier_coverage_20": 0.01230423217639327, "rewards/frontier_coverage_25": 0.02976319268345833, "rewards/frontier_coverage_5": 0.007735726609826088, "signal/accuracy_reward/centered_abs_mean": 0.15657551884651183, "signal/accuracy_reward/group_std_mean": 0.20762513875961303, "signal/accuracy_reward/group_zero_std_frac": 0.40833333134651184, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07828775942325591, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07828775942325591, "signal/advantage_abs_mean": 0.09784245938062668, "signal/advantage_pre_scale_abs_mean": 0.09784245938062668, "signal/advantage_pre_scale_std": 0.17648713588714598, "signal/advantage_std": 0.17648713588714598, "signal/brier_reward/centered_abs_mean": 0.12497896701097488, "signal/brier_reward/group_std_mean": 0.16387878954410554, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012497896514832973, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012497896514832973, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04354229345917702, "signal/confidence_uniqueness_reward/group_std_mean": 0.06585515961050988, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004354229662567377, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004354229662567377, "signal/format_reward/centered_abs_mean": 0.02369791679084301, "signal/format_reward/group_std_mean": 0.04182791784405708, "signal/format_reward/group_zero_std_frac": 0.8361111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011848958395421504, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011848958395421504, "signal/frontier_coverage_0/centered_abs_mean": 0.06305849850177765, "signal/frontier_coverage_0/group_std_mean": 0.08656549751758576, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0009017364820465446, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0009017364820465446, "signal/frontier_coverage_1/centered_abs_mean": 0.06305849850177765, "signal/frontier_coverage_1/group_std_mean": 0.08656549751758576, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0009017364820465446, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0009017364820465446, "signal/frontier_coverage_10/centered_abs_mean": 0.06107858419418335, "signal/frontier_coverage_10/group_std_mean": 0.08405377566814423, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0008734237751923501, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008734237751923501, "signal/frontier_coverage_15/centered_abs_mean": 0.05693260729312897, "signal/frontier_coverage_15/group_std_mean": 0.07869968116283417, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008141362806782127, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008141362806782127, "signal/frontier_coverage_20/centered_abs_mean": 0.041933455318212506, "signal/frontier_coverage_20/group_std_mean": 0.05869346261024475, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005996484076604247, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005996484076604247, "signal/frontier_coverage_25/centered_abs_mean": 0.04046922326087952, "signal/frontier_coverage_25/group_std_mean": 0.053788629919290544, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005787098547443747, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005787098547443747, "signal/frontier_coverage_5/centered_abs_mean": 0.06229802295565605, "signal/frontier_coverage_5/group_std_mean": 0.08560123592615128, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0008908617543056607, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0008908617543056607, "step": 120 }, { "calibration/aurc": 0.2082183569339026, "calibration/batch_distribution_entropy": 0.8211991327882566, "calibration/buffer_distribution_entropy": 0.7568997286321094, "calibration/confidence_entropy": 0.5527962206752399, "calibration/coverage@0%": 0.022027035701369612, "calibration/coverage@1%": 0.022027035701369612, "calibration/coverage@10%": 0.29937981558557936, "calibration/coverage@15%": 0.3620036508586016, "calibration/coverage@20%": 0.4019900773574, "calibration/coverage@25%": 0.6806566429667174, "calibration/coverage@30%": 0.7411075469279128, "calibration/coverage@5%": 0.08646273123942735, "calibration/ece": 0.12462517933287802, "calibration/mean_confidence": 0.6664884744206967, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014496527777777768, "completions/max_length": 3502.2, "completions/max_terminated_length": 3502.2, "completions/mean_length": 670.2757080078125, "completions/mean_terminated_length": 680.2117797851563, "completions/min_length": 0.0, "completions/min_terminated_length": 183.4, "epoch": 0.2999962500468744, "grad_norm": 0.0004161894030403346, "learning_rate": 2.5e-06, "loss": -0.0132, "num_tokens": 272889840.0, "reward": 1.005896532535553, "reward_std": 0.12880902737379074, "rewards/accuracy_reward": 0.6809027791023254, "rewards/brier_reward": 0.7915880799293518, "rewards/confidence_uniqueness_reward": 0.9347058057785034, "rewards/format_reward": 0.9855034708976745, "rewards/frontier_coverage_0": -0.006131393508985639, "rewards/frontier_coverage_1": -0.006131393508985639, "rewards/frontier_coverage_10": -0.004654986085370183, "rewards/frontier_coverage_15": -0.0017717648821417241, "rewards/frontier_coverage_20": 0.002877543866634369, "rewards/frontier_coverage_25": 0.02585282623767853, "rewards/frontier_coverage_5": -0.005564809101633728, "signal/accuracy_reward/centered_abs_mean": 0.15927734076976777, "signal/accuracy_reward/group_std_mean": 0.20954229235649108, "signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07963867038488388, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07963867038488388, "signal/advantage_abs_mean": 0.0956741526722908, "signal/advantage_pre_scale_abs_mean": 0.0956741526722908, "signal/advantage_pre_scale_std": 0.16752077937126159, "signal/advantage_std": 0.16752077937126159, "signal/brier_reward/centered_abs_mean": 0.11655332297086715, "signal/brier_reward/group_std_mean": 0.1505295991897583, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011655332706868648, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011655332706868648, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04199672415852547, "signal/confidence_uniqueness_reward/group_std_mean": 0.06219554841518402, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004199672443792224, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004199672443792224, "signal/format_reward/centered_abs_mean": 0.023106553591787815, "signal/format_reward/group_std_mean": 0.03924813717603683, "signal/format_reward/group_zero_std_frac": 0.8499999880790711, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011553276795893908, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011553276795893908, "signal/frontier_coverage_0/centered_abs_mean": 0.10545411854982376, "signal/frontier_coverage_0/group_std_mean": 0.13934148699045182, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001507993880659342, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001507993880659342, "signal/frontier_coverage_1/centered_abs_mean": 0.10545411854982376, "signal/frontier_coverage_1/group_std_mean": 0.13934148699045182, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001507993880659342, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001507993880659342, "signal/frontier_coverage_10/centered_abs_mean": 0.10189146101474762, "signal/frontier_coverage_10/group_std_mean": 0.1348419487476349, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014570478349924087, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014570478349924087, "signal/frontier_coverage_15/centered_abs_mean": 0.09459190368652344, "signal/frontier_coverage_15/group_std_mean": 0.12573918104171752, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013526642229408025, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013526642229408025, "signal/frontier_coverage_20/centered_abs_mean": 0.07673133313655853, "signal/frontier_coverage_20/group_std_mean": 0.1030562549829483, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010972580406814814, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010972580406814814, "signal/frontier_coverage_25/centered_abs_mean": 0.04959097653627396, "signal/frontier_coverage_25/group_std_mean": 0.06758146658539772, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007091509876772761, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007091509876772761, "signal/frontier_coverage_5/centered_abs_mean": 0.10424444675445557, "signal/frontier_coverage_5/group_std_mean": 0.1378079980611801, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014906955417245626, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014906955417245626, "step": 125 }, { "calibration/aurc": 0.26075673968720536, "calibration/batch_distribution_entropy": 0.874285034631829, "calibration/buffer_distribution_entropy": 0.8284399180154496, "calibration/confidence_entropy": 0.5473131223715086, "calibration/coverage@0%": 0.024374274739239032, "calibration/coverage@1%": 0.024374274739239032, "calibration/coverage@10%": 0.13814292939717276, "calibration/coverage@15%": 0.2927289784951862, "calibration/coverage@20%": 0.3928601633049443, "calibration/coverage@25%": 0.46272417076934397, "calibration/coverage@30%": 0.6714604813734013, "calibration/coverage@5%": 0.04966937264230805, "calibration/ece": 0.12237342404558275, "calibration/mean_confidence": 0.626171849921876, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021440972222222254, "completions/max_length": 3600.2, "completions/max_terminated_length": 3600.2, "completions/mean_length": 685.9378540039063, "completions/mean_terminated_length": 701.0846801757813, "completions/min_length": 0.0, "completions/min_terminated_length": 168.2, "epoch": 0.3119961000487494, "grad_norm": 0.0004247704928275198, "learning_rate": 2.349397590361446e-06, "loss": -0.0169, "num_tokens": 283916644.0, "reward": 0.9914659857749939, "reward_std": 0.14033911675214766, "rewards/accuracy_reward": 0.6625868201255798, "rewards/brier_reward": 0.7736161351203918, "rewards/confidence_uniqueness_reward": 0.9349322438240051, "rewards/format_reward": 0.9785590291023254, "rewards/frontier_coverage_0": -0.010037094075232744, "rewards/frontier_coverage_1": -0.010037094075232744, "rewards/frontier_coverage_10": -0.008441044599749148, "rewards/frontier_coverage_15": -0.00335610918700695, "rewards/frontier_coverage_20": 0.008132204459980131, "rewards/frontier_coverage_25": 0.03618508372455835, "rewards/frontier_coverage_5": -0.009772640746086836, "signal/accuracy_reward/centered_abs_mean": 0.17634005844593048, "signal/accuracy_reward/group_std_mean": 0.23339370787143707, "signal/accuracy_reward/group_zero_std_frac": 0.3388888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08817002922296524, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08817002922296524, "signal/advantage_abs_mean": 0.10400059223175048, "signal/advantage_pre_scale_abs_mean": 0.10400059223175048, "signal/advantage_pre_scale_std": 0.17410335540771485, "signal/advantage_std": 0.17410335540771485, "signal/brier_reward/centered_abs_mean": 0.12626205682754515, "signal/brier_reward/group_std_mean": 0.16308861076831818, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012626205757260322, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012626205757260322, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04283556342124939, "signal/confidence_uniqueness_reward/group_std_mean": 0.06413544788956642, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0042835562489926815, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042835562489926815, "signal/format_reward/centered_abs_mean": 0.027652995474636555, "signal/format_reward/group_std_mean": 0.045563656091690066, "signal/format_reward/group_zero_std_frac": 0.8277777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013826497737318278, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013826497737318278, "signal/frontier_coverage_0/centered_abs_mean": 0.13005276918411254, "signal/frontier_coverage_0/group_std_mean": 0.17312212884426117, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001859754603356123, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001859754603356123, "signal/frontier_coverage_1/centered_abs_mean": 0.13005276918411254, "signal/frontier_coverage_1/group_std_mean": 0.17312212884426117, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001859754603356123, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001859754603356123, "signal/frontier_coverage_10/centered_abs_mean": 0.12581277936697005, "signal/frontier_coverage_10/group_std_mean": 0.16772884130477905, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017991228029131888, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017991228029131888, "signal/frontier_coverage_15/centered_abs_mean": 0.11518070250749587, "signal/frontier_coverage_15/group_std_mean": 0.1540255665779114, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016470840433612465, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016470840433612465, "signal/frontier_coverage_20/centered_abs_mean": 0.08615544736385346, "signal/frontier_coverage_20/group_std_mean": 0.11641546934843064, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012320228852331638, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012320228852331638, "signal/frontier_coverage_25/centered_abs_mean": 0.05977813303470612, "signal/frontier_coverage_25/group_std_mean": 0.07970146983861923, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008548272890038788, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008548272890038788, "signal/frontier_coverage_5/centered_abs_mean": 0.12958541363477707, "signal/frontier_coverage_5/group_std_mean": 0.1725340485572815, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001853071292862296, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001853071292862296, "step": 130 }, { "calibration/aurc": 0.2546360563314921, "calibration/batch_distribution_entropy": 0.8330176576430549, "calibration/buffer_distribution_entropy": 0.8734325642612234, "calibration/confidence_entropy": 0.4818107536798132, "calibration/coverage@0%": 0.014179309586631486, "calibration/coverage@1%": 0.014179309586631486, "calibration/coverage@10%": 0.23271767810026386, "calibration/coverage@15%": 0.26649076517150394, "calibration/coverage@20%": 0.433201793135017, "calibration/coverage@25%": 0.5304986738889623, "calibration/coverage@30%": 0.5728792386450393, "calibration/coverage@5%": 0.1934078166226913, "calibration/ece": 0.1538258270193908, "calibration/mean_confidence": 0.7085402165749761, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011892361111111093, "completions/max_length": 3711.6, "completions/max_terminated_length": 3711.6, "completions/mean_length": 664.8523559570312, "completions/mean_terminated_length": 672.9011596679687, "completions/min_length": 0.0, "completions/min_terminated_length": 161.8, "epoch": 0.32399595005062437, "grad_norm": 0.000490625505335629, "learning_rate": 2.1987951807228917e-06, "loss": -0.0101, "num_tokens": 294668767.0, "reward": 1.0130939960479737, "reward_std": 0.1343725234270096, "rewards/accuracy_reward": 0.6880208373069763, "rewards/brier_reward": 0.7956640720367432, "rewards/confidence_uniqueness_reward": 0.9440826296806335, "rewards/format_reward": 0.9880208373069763, "rewards/frontier_coverage_0": -0.0007627993822097778, "rewards/frontier_coverage_1": -0.0007627993822097778, "rewards/frontier_coverage_10": 0.0009573293849825859, "rewards/frontier_coverage_15": 0.004394118906930089, "rewards/frontier_coverage_20": 0.015732752112671732, "rewards/frontier_coverage_25": 0.05771690420806408, "rewards/frontier_coverage_5": -0.000457253772765398, "signal/accuracy_reward/centered_abs_mean": 0.16697049140930176, "signal/accuracy_reward/group_std_mean": 0.223639115691185, "signal/accuracy_reward/group_zero_std_frac": 0.35555556416511536, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08348524570465088, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08348524570465088, "signal/advantage_abs_mean": 0.09535450786352158, "signal/advantage_pre_scale_abs_mean": 0.09535450786352158, "signal/advantage_pre_scale_std": 0.16756429374217988, "signal/advantage_std": 0.16756429374217988, "signal/brier_reward/centered_abs_mean": 0.12386199980974197, "signal/brier_reward/group_std_mean": 0.16375071704387664, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012386200204491615, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012386200204491615, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.035422375053167345, "signal/confidence_uniqueness_reward/group_std_mean": 0.059968823194503786, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00354223744943738, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00354223744943738, "signal/format_reward/centered_abs_mean": 0.02140842005610466, "signal/format_reward/group_std_mean": 0.04312895014882088, "signal/format_reward/group_zero_std_frac": 0.8166666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01070421002805233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01070421002805233, "signal/frontier_coverage_0/centered_abs_mean": 0.11899998188018798, "signal/frontier_coverage_0/group_std_mean": 0.15951877534389497, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001701699779368937, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001701699779368937, "signal/frontier_coverage_1/centered_abs_mean": 0.11899998188018798, "signal/frontier_coverage_1/group_std_mean": 0.15951877534389497, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001701699779368937, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001701699779368937, "signal/frontier_coverage_10/centered_abs_mean": 0.11335770487785339, "signal/frontier_coverage_10/group_std_mean": 0.1522216647863388, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001621015160344541, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001621015160344541, "signal/frontier_coverage_15/centered_abs_mean": 0.10240471959114075, "signal/frontier_coverage_15/group_std_mean": 0.13803330510854722, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014643874485045672, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014643874485045672, "signal/frontier_coverage_20/centered_abs_mean": 0.0672150082886219, "signal/frontier_coverage_20/group_std_mean": 0.0915078029036522, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009611746412701905, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009611746412701905, "signal/frontier_coverage_25/centered_abs_mean": 0.06437275260686874, "signal/frontier_coverage_25/group_std_mean": 0.08481907844543457, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009205303387716413, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009205303387716413, "signal/frontier_coverage_5/centered_abs_mean": 0.11798569560050964, "signal/frontier_coverage_5/group_std_mean": 0.15821486115455627, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00168719538487494, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00168719538487494, "step": 135 }, { "calibration/aurc": 0.1854829328737862, "calibration/batch_distribution_entropy": 0.7996682424965277, "calibration/buffer_distribution_entropy": 0.8905462538846519, "calibration/confidence_entropy": 0.41854264390855284, "calibration/coverage@0%": 0.04533180574298995, "calibration/coverage@1%": 0.04533180574298995, "calibration/coverage@10%": 0.24171738663204975, "calibration/coverage@15%": 0.3580652283340696, "calibration/coverage@20%": 0.5067775705274844, "calibration/coverage@25%": 0.7254039905217915, "calibration/coverage@30%": 0.956267313498989, "calibration/coverage@5%": 0.12600271378231903, "calibration/ece": 0.12841547965995068, "calibration/mean_confidence": 0.7517699707017254, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013541666666666674, "completions/max_length": 3653.8, "completions/max_terminated_length": 3653.8, "completions/mean_length": 649.6119018554688, "completions/mean_terminated_length": 658.50859375, "completions/min_length": 0.0, "completions/min_terminated_length": 165.4, "epoch": 0.33599580005249935, "grad_norm": 0.0004042711516376585, "learning_rate": 2.0481927710843377e-06, "loss": -0.0109, "num_tokens": 305256520.0, "reward": 1.0060909271240235, "reward_std": 0.12792308628559113, "rewards/accuracy_reward": 0.6741319417953491, "rewards/brier_reward": 0.7920880436897277, "rewards/confidence_uniqueness_reward": 0.9383025646209717, "rewards/format_reward": 0.9864583373069763, "rewards/frontier_coverage_0": 0.014161212788894772, "rewards/frontier_coverage_1": 0.014161212788894772, "rewards/frontier_coverage_10": 0.014498895592987537, "rewards/frontier_coverage_15": 0.014886665157973766, "rewards/frontier_coverage_20": 0.027863727882504463, "rewards/frontier_coverage_25": 0.09292519390583039, "rewards/frontier_coverage_5": 0.014280988043174148, "signal/accuracy_reward/centered_abs_mean": 0.15073784589767455, "signal/accuracy_reward/group_std_mean": 0.2007976531982422, "signal/accuracy_reward/group_zero_std_frac": 0.4194444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07536892294883728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07536892294883728, "signal/advantage_abs_mean": 0.09263349771499634, "signal/advantage_pre_scale_abs_mean": 0.09263349771499634, "signal/advantage_pre_scale_std": 0.1665874868631363, "signal/advantage_std": 0.1665874868631363, "signal/brier_reward/centered_abs_mean": 0.12627332657575607, "signal/brier_reward/group_std_mean": 0.16518832445144654, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012627332285046578, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012627332285046578, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.037095585465431215, "signal/confidence_uniqueness_reward/group_std_mean": 0.06054994612932205, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003709558630362153, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003709558630362153, "signal/format_reward/centered_abs_mean": 0.021560330502688886, "signal/format_reward/group_std_mean": 0.041833048313856126, "signal/format_reward/group_zero_std_frac": 0.8194444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010780165251344443, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010780165251344443, "signal/frontier_coverage_0/centered_abs_mean": 0.09921992719173431, "signal/frontier_coverage_0/group_std_mean": 0.13895856738090515, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014188449829816818, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014188449829816818, "signal/frontier_coverage_1/centered_abs_mean": 0.09921992719173431, "signal/frontier_coverage_1/group_std_mean": 0.13895856738090515, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014188449829816818, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014188449829816818, "signal/frontier_coverage_10/centered_abs_mean": 0.09619618356227874, "signal/frontier_coverage_10/group_std_mean": 0.13501449525356293, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013756054220721125, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013756054220721125, "signal/frontier_coverage_15/centered_abs_mean": 0.08267004191875457, "signal/frontier_coverage_15/group_std_mean": 0.1172051951289177, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011821816442534328, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011821816442534328, "signal/frontier_coverage_20/centered_abs_mean": 0.049167075753211976, "signal/frontier_coverage_20/group_std_mean": 0.06892770677804946, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000703089137095958, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000703089137095958, "signal/frontier_coverage_25/centered_abs_mean": 0.08086840957403182, "signal/frontier_coverage_25/group_std_mean": 0.10296626091003418, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011564183048903942, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011564183048903942, "signal/frontier_coverage_5/centered_abs_mean": 0.09877839088439941, "signal/frontier_coverage_5/group_std_mean": 0.13836795836687088, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001412531011737883, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001412531011737883, "step": 140 }, { "calibration/aurc": 0.1974786325318561, "calibration/batch_distribution_entropy": 0.807348167009032, "calibration/buffer_distribution_entropy": 0.8525369126779999, "calibration/confidence_entropy": 0.41936921197900173, "calibration/coverage@0%": 0.020484134148034995, "calibration/coverage@1%": 0.020484134148034995, "calibration/coverage@10%": 0.12842496038935974, "calibration/coverage@15%": 0.3236565417377872, "calibration/coverage@20%": 0.635893543720149, "calibration/coverage@25%": 0.7277863040288939, "calibration/coverage@30%": 0.8788835787859582, "calibration/coverage@5%": 0.055793119011248435, "calibration/ece": 0.12143313418001216, "calibration/mean_confidence": 0.741192283313203, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01067708333333337, "completions/max_length": 3261.6, "completions/max_terminated_length": 3261.6, "completions/mean_length": 632.3050537109375, "completions/mean_terminated_length": 639.30107421875, "completions/min_length": 0.0, "completions/min_terminated_length": 182.4, "epoch": 0.34799565005437433, "grad_norm": 0.00040693863411433995, "learning_rate": 1.8975903614457832e-06, "loss": -0.0105, "num_tokens": 315605282.0, "reward": 1.0286210775375366, "reward_std": 0.11897408664226532, "rewards/accuracy_reward": 0.7124131917953491, "rewards/brier_reward": 0.8128675818443298, "rewards/confidence_uniqueness_reward": 0.9287760734558106, "rewards/format_reward": 0.9893229246139527, "rewards/frontier_coverage_0": 0.01579418806359172, "rewards/frontier_coverage_1": 0.01579418806359172, "rewards/frontier_coverage_10": 0.016339881264138968, "rewards/frontier_coverage_15": 0.01718453587964177, "rewards/frontier_coverage_20": 0.03750094771385193, "rewards/frontier_coverage_25": 0.1325998529791832, "rewards/frontier_coverage_5": 0.015739528834819792, "signal/accuracy_reward/centered_abs_mean": 0.137353515625, "signal/accuracy_reward/group_std_mean": 0.18777381181716918, "signal/accuracy_reward/group_zero_std_frac": 0.4361111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0686767578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0686767578125, "signal/advantage_abs_mean": 0.08469511717557907, "signal/advantage_pre_scale_abs_mean": 0.08469511717557907, "signal/advantage_pre_scale_std": 0.1597517877817154, "signal/advantage_std": 0.1597517877817154, "signal/brier_reward/centered_abs_mean": 0.12741477489471437, "signal/brier_reward/group_std_mean": 0.16703042685985564, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012741477787494659, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012741477787494659, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04068734273314476, "signal/confidence_uniqueness_reward/group_std_mean": 0.05980287864804268, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004068734264001251, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004068734264001251, "signal/format_reward/centered_abs_mean": 0.018288845382630824, "signal/format_reward/group_std_mean": 0.03258528374135494, "signal/format_reward/group_zero_std_frac": 0.8750000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009144422691315412, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009144422691315412, "signal/frontier_coverage_0/centered_abs_mean": 0.08689655661582947, "signal/frontier_coverage_0/group_std_mean": 0.11893046051263809, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00124262070748955, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00124262070748955, "signal/frontier_coverage_1/centered_abs_mean": 0.08689655661582947, "signal/frontier_coverage_1/group_std_mean": 0.11893046051263809, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00124262070748955, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00124262070748955, "signal/frontier_coverage_10/centered_abs_mean": 0.08449746146798134, "signal/frontier_coverage_10/group_std_mean": 0.11587611138820648, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012083137058652937, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012083137058652937, "signal/frontier_coverage_15/centered_abs_mean": 0.07269451022148132, "signal/frontier_coverage_15/group_std_mean": 0.1005746454000473, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010395315941423178, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010395315941423178, "signal/frontier_coverage_20/centered_abs_mean": 0.04770020917057991, "signal/frontier_coverage_20/group_std_mean": 0.0634695328772068, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006821130053140223, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006821130053140223, "signal/frontier_coverage_25/centered_abs_mean": 0.08773275762796402, "signal/frontier_coverage_25/group_std_mean": 0.11324829757213592, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012545783771201967, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012545783771201967, "signal/frontier_coverage_5/centered_abs_mean": 0.08658337146043778, "signal/frontier_coverage_5/group_std_mean": 0.11853417456150055, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012381422566249967, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012381422566249967, "step": 145 }, { "calibration/aurc": 0.2009286800964662, "calibration/batch_distribution_entropy": 0.8164642374302046, "calibration/buffer_distribution_entropy": 0.8201398178798591, "calibration/confidence_entropy": 0.4014055039722427, "calibration/coverage@0%": 0.010666666666666668, "calibration/coverage@1%": 0.010666666666666668, "calibration/coverage@10%": 0.36389817290552584, "calibration/coverage@15%": 0.4667266934046346, "calibration/coverage@20%": 0.5943920900178253, "calibration/coverage@25%": 0.6405080213903743, "calibration/coverage@30%": 0.6833778966131907, "calibration/coverage@5%": 0.09828333333333332, "calibration/ece": 0.15416181123167888, "calibration/mean_confidence": 0.7122128912211364, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009722222222222233, "completions/max_length": 3621.2, "completions/max_terminated_length": 3621.2, "completions/mean_length": 713.7203979492188, "completions/mean_terminated_length": 720.7264038085938, "completions/min_length": 0.0, "completions/min_terminated_length": 178.2, "epoch": 0.3599955000562493, "grad_norm": 0.0004712261143140495, "learning_rate": 1.7469879518072292e-06, "loss": -0.0083, "num_tokens": 326937677.0, "reward": 1.0151524186134337, "reward_std": 0.1302838146686554, "rewards/accuracy_reward": 0.6858506917953491, "rewards/brier_reward": 0.8010693430900574, "rewards/confidence_uniqueness_reward": 0.9333727955818176, "rewards/format_reward": 0.9902777671813965, "rewards/frontier_coverage_0": 0.021950625255703925, "rewards/frontier_coverage_1": 0.021950625255703925, "rewards/frontier_coverage_10": 0.022064855322241783, "rewards/frontier_coverage_15": 0.022844681143760683, "rewards/frontier_coverage_20": 0.03186333496123552, "rewards/frontier_coverage_25": 0.11219749450683594, "rewards/frontier_coverage_5": 0.021950625255703925, "signal/accuracy_reward/centered_abs_mean": 0.16527235209941865, "signal/accuracy_reward/group_std_mean": 0.21440712809562684, "signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08263617604970933, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08263617604970933, "signal/advantage_abs_mean": 0.09583060741424561, "signal/advantage_pre_scale_abs_mean": 0.09583060741424561, "signal/advantage_pre_scale_std": 0.1677115947008133, "signal/advantage_std": 0.1677115947008133, "signal/brier_reward/centered_abs_mean": 0.1361823335289955, "signal/brier_reward/group_std_mean": 0.17782002985477446, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013618233613669872, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013618233613669872, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.039950243383646014, "signal/confidence_uniqueness_reward/group_std_mean": 0.06207837164402008, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.002777777798473835, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003995024506002665, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003995024506002665, "signal/format_reward/centered_abs_mean": 0.01697048614732921, "signal/format_reward/group_std_mean": 0.03419107310473919, "signal/format_reward/group_zero_std_frac": 0.850000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008485243073664606, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008485243073664606, "signal/frontier_coverage_0/centered_abs_mean": 0.1078558087348938, "signal/frontier_coverage_0/group_std_mean": 0.15047508776187896, "signal/frontier_coverage_0/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015423380769789218, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015423380769789218, "signal/frontier_coverage_1/centered_abs_mean": 0.1078558087348938, "signal/frontier_coverage_1/group_std_mean": 0.15047508776187896, "signal/frontier_coverage_1/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015423380769789218, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015423380769789218, "signal/frontier_coverage_10/centered_abs_mean": 0.10575756281614304, "signal/frontier_coverage_10/group_std_mean": 0.1477883592247963, "signal/frontier_coverage_10/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015123330289497972, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015123330289497972, "signal/frontier_coverage_15/centered_abs_mean": 0.09201570004224777, "signal/frontier_coverage_15/group_std_mean": 0.12966947257518768, "signal/frontier_coverage_15/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013158244779333471, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013158244779333471, "signal/frontier_coverage_20/centered_abs_mean": 0.05600855126976967, "signal/frontier_coverage_20/group_std_mean": 0.07790684998035431, "signal/frontier_coverage_20/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008009222452528775, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008009222452528775, "signal/frontier_coverage_25/centered_abs_mean": 0.08503240048885345, "signal/frontier_coverage_25/group_std_mean": 0.1091775730252266, "signal/frontier_coverage_25/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012159633450210094, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012159633450210094, "signal/frontier_coverage_5/centered_abs_mean": 0.1078558087348938, "signal/frontier_coverage_5/group_std_mean": 0.15047508776187896, "signal/frontier_coverage_5/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015423380769789218, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015423380769789218, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 0.1497119816911291, "eval_calibration/batch_distribution_entropy": 0.7968346178805156, "eval_calibration/buffer_distribution_entropy": 0.8407002187440905, "eval_calibration/confidence_entropy": 0.4125144127540233, "eval_calibration/coverage@0%": 0.20950940860215053, "eval_calibration/coverage@1%": 0.20950940860215053, "eval_calibration/coverage@10%": 0.43934811827956993, "eval_calibration/coverage@15%": 0.5816532258064516, "eval_calibration/coverage@20%": 0.6876680107526881, "eval_calibration/coverage@25%": 0.8776881720430106, "eval_calibration/coverage@30%": 0.9786626344086021, "eval_calibration/coverage@5%": 0.2824260752688172, "eval_calibration/ece": 0.16998011964569013, "eval_calibration/mean_confidence": 0.7282188050854926, "eval_completions/clipped_ratio": 0.008680555555555561, "eval_completions/max_length": 2416.5, "eval_completions/max_terminated_length": 2416.5, "eval_completions/mean_length": 695.3529561360677, "eval_completions/mean_terminated_length": 701.4133707682291, "eval_completions/min_length": 54.666666666666664, "eval_completions/min_terminated_length": 227.16666666666666, "eval_loss": 0.0, "eval_num_tokens": 326937677.0, "eval_reward": 1.0095117688179016, "eval_reward_std": 0.24901040395100912, "eval_rewards/accuracy_reward": 0.6796875099341074, "eval_rewards/brier_reward": 0.8043731153011322, "eval_rewards/confidence_uniqueness_reward": 0.8918871482213339, "eval_rewards/format_reward": 0.9913194477558136, "eval_rewards/frontier_coverage_0": 0.034088116294393934, "eval_rewards/frontier_coverage_1": 0.034088116294393934, "eval_rewards/frontier_coverage_10": 0.03365040601541599, "eval_rewards/frontier_coverage_15": 0.033414963788042464, "eval_rewards/frontier_coverage_20": 0.037281897539893784, "eval_rewards/frontier_coverage_25": 0.0998396414021651, "eval_rewards/frontier_coverage_5": 0.034088116294393934, "eval_runtime": 190.7746, "eval_samples_per_second": 5.242, "eval_signal/accuracy_reward/centered_abs_mean": 0.4189995676279068, "eval_signal/accuracy_reward/group_std_mean": 0.4633843054374059, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2094997838139534, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2094997838139534, "eval_signal/advantage_abs_mean": 0.2170354425907135, "eval_signal/advantage_pre_scale_abs_mean": 0.2170354425907135, "eval_signal/advantage_pre_scale_std": 0.24843567858139673, "eval_signal/advantage_std": 0.24843567858139673, "eval_signal/brier_reward/centered_abs_mean": 0.2193461755911509, "eval_signal/brier_reward/group_std_mean": 0.27922573685646057, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02193461824208498, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02193461824208498, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04991401235262553, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07574755760530631, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0049914012585456176, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0049914012585456176, "eval_signal/format_reward/centered_abs_mean": 0.016493055348594982, "eval_signal/format_reward/group_std_mean": 0.04259948432445526, "eval_signal/format_reward/group_zero_std_frac": 0.7777778009573618, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.008246527674297491, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.008246527674297491, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.1894952729344368, "eval_signal/frontier_coverage_0/group_std_mean": 0.30272159973780316, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027097822166979313, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027097822166979313, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.1894952729344368, "eval_signal/frontier_coverage_1/group_std_mean": 0.30272159973780316, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027097822166979313, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027097822166979313, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.18484538545211157, "eval_signal/frontier_coverage_10/group_std_mean": 0.29638702670733136, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026432890444993973, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026432890444993973, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.15993489821751913, "eval_signal/frontier_coverage_15/group_std_mean": 0.26100187251965207, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022870690639441213, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022870690639441213, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.08387432868282, "eval_signal/frontier_coverage_20/group_std_mean": 0.1353093981742859, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011994028852010767, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011994028852010767, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.13570485015710196, "eval_signal/frontier_coverage_25/group_std_mean": 0.16785304248332977, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019405794446356595, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019405794446356595, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.1894952729344368, "eval_signal/frontier_coverage_5/group_std_mean": 0.30272159973780316, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027097822166979313, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027097822166979313, "eval_steps_per_second": 0.031, "step": 150 }, { "calibration/aurc": 0.17029982830875182, "calibration/batch_distribution_entropy": 0.828323867142758, "calibration/buffer_distribution_entropy": 0.8372604232456323, "calibration/confidence_entropy": 0.40143344705114015, "calibration/coverage@0%": 0.002617801047120419, "calibration/coverage@1%": 0.002617801047120419, "calibration/coverage@10%": 0.5048941513727101, "calibration/coverage@15%": 0.5596638265431797, "calibration/coverage@20%": 0.6364728575365114, "calibration/coverage@25%": 0.7473739322127307, "calibration/coverage@30%": 0.8836594103058694, "calibration/coverage@5%": 0.002617801047120419, "calibration/ece": 0.13330443000274134, "calibration/mean_confidence": 0.7272123755133635, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008940972222222232, "completions/max_length": 3450.4, "completions/max_terminated_length": 3450.4, "completions/mean_length": 683.6056518554688, "completions/mean_terminated_length": 689.88115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.3719953500581243, "grad_norm": 0.00048128137132152915, "learning_rate": 1.5963855421686747e-06, "loss": -0.0059, "num_tokens": 337920526.0, "reward": 1.0457221508026122, "reward_std": 0.1301838055253029, "rewards/accuracy_reward": 0.7413194417953491, "rewards/brier_reward": 0.8202741265296936, "rewards/confidence_uniqueness_reward": 0.942044448852539, "rewards/format_reward": 0.9910590171813964, "rewards/frontier_coverage_0": 0.0016106660943478346, "rewards/frontier_coverage_1": 0.0016106660943478346, "rewards/frontier_coverage_10": 0.002487003430724144, "rewards/frontier_coverage_15": 0.008382186014205217, "rewards/frontier_coverage_20": 0.040102506056427956, "rewards/frontier_coverage_25": 0.17503876686096193, "rewards/frontier_coverage_5": 0.0016106660943478346, "signal/accuracy_reward/centered_abs_mean": 0.16593966782093048, "signal/accuracy_reward/group_std_mean": 0.21918415725231172, "signal/accuracy_reward/group_zero_std_frac": 0.3805555641651154, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08296983391046524, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08296983391046524, "signal/advantage_abs_mean": 0.09460719525814057, "signal/advantage_pre_scale_abs_mean": 0.09460719525814057, "signal/advantage_pre_scale_std": 0.1647391140460968, "signal/advantage_std": 0.1647391140460968, "signal/brier_reward/centered_abs_mean": 0.13260589838027953, "signal/brier_reward/group_std_mean": 0.17412539422512055, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013260589353740216, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013260589353740216, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.033828570321202275, "signal/confidence_uniqueness_reward/group_std_mean": 0.05308753773570061, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003382857143878937, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003382857143878937, "signal/format_reward/centered_abs_mean": 0.015771484561264516, "signal/format_reward/group_std_mean": 0.03148765973746777, "signal/format_reward/group_zero_std_frac": 0.8638889074325562, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007885742280632258, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007885742280632258, "signal/frontier_coverage_0/centered_abs_mean": 0.12180711925029755, "signal/frontier_coverage_0/group_std_mean": 0.16962958872318268, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017418418079614638, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017418418079614638, "signal/frontier_coverage_1/centered_abs_mean": 0.12180711925029755, "signal/frontier_coverage_1/group_std_mean": 0.16962958872318268, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017418418079614638, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017418418079614638, "signal/frontier_coverage_10/centered_abs_mean": 0.11921639740467072, "signal/frontier_coverage_10/group_std_mean": 0.16621364057064056, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017047945875674486, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017047945875674486, "signal/frontier_coverage_15/centered_abs_mean": 0.0924990564584732, "signal/frontier_coverage_15/group_std_mean": 0.13051227778196334, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001322736474685371, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001322736474685371, "signal/frontier_coverage_20/centered_abs_mean": 0.05612751841545105, "signal/frontier_coverage_20/group_std_mean": 0.07590975016355514, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008026235154829919, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008026235154829919, "signal/frontier_coverage_25/centered_abs_mean": 0.11158772855997086, "signal/frontier_coverage_25/group_std_mean": 0.14441257119178771, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015957045601680876, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015957045601680876, "signal/frontier_coverage_5/centered_abs_mean": 0.12180711925029755, "signal/frontier_coverage_5/group_std_mean": 0.16962958872318268, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017418418079614638, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017418418079614638, "step": 155 }, { "calibration/aurc": 0.13742324374109022, "calibration/batch_distribution_entropy": 0.8046436261756036, "calibration/buffer_distribution_entropy": 0.8454471239403271, "calibration/confidence_entropy": 0.4209601948297349, "calibration/coverage@0%": 0.030325589005235597, "calibration/coverage@1%": 0.030325589005235597, "calibration/coverage@10%": 0.6808016148896961, "calibration/coverage@15%": 0.7603269147084422, "calibration/coverage@20%": 0.781201044386423, "calibration/coverage@25%": 0.8, "calibration/coverage@30%": 0.8410526315789474, "calibration/coverage@5%": 0.3801644545744061, "calibration/ece": 0.12313236262405054, "calibration/mean_confidence": 0.7447349905237983, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010069444444444442, "completions/max_length": 3256.0, "completions/max_terminated_length": 3256.0, "completions/mean_length": 697.6481811523438, "completions/mean_terminated_length": 704.7627807617188, "completions/min_length": 0.0, "completions/min_terminated_length": 187.6, "epoch": 0.38399520005999926, "grad_norm": 0.00042031033081002533, "learning_rate": 1.4457831325301204e-06, "loss": -0.0081, "num_tokens": 349044729.0, "reward": 1.011973214149475, "reward_std": 0.1254075601696968, "rewards/accuracy_reward": 0.6768229246139527, "rewards/brier_reward": 0.7973306894302368, "rewards/confidence_uniqueness_reward": 0.9424768567085267, "rewards/format_reward": 0.98984375, "rewards/frontier_coverage_0": 0.020558654330670834, "rewards/frontier_coverage_1": 0.020558654330670834, "rewards/frontier_coverage_10": 0.020932418294250965, "rewards/frontier_coverage_15": 0.022469326481223108, "rewards/frontier_coverage_20": 0.0480774313211441, "rewards/frontier_coverage_25": 0.17265710532665252, "rewards/frontier_coverage_5": 0.020558654330670834, "signal/accuracy_reward/centered_abs_mean": 0.15957573652267457, "signal/accuracy_reward/group_std_mean": 0.20743002891540527, "signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07978786826133728, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07978786826133728, "signal/advantage_abs_mean": 0.09231876432895661, "signal/advantage_pre_scale_abs_mean": 0.09231876432895661, "signal/advantage_pre_scale_std": 0.16247815191745757, "signal/advantage_std": 0.16247815191745757, "signal/brier_reward/centered_abs_mean": 0.13798875659704207, "signal/brier_reward/group_std_mean": 0.17662995755672456, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013798876665532589, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013798876665532589, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.033293415978550914, "signal/confidence_uniqueness_reward/group_std_mean": 0.05347738191485405, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003329341718927026, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003329341718927026, "signal/format_reward/centered_abs_mean": 0.01769205741584301, "signal/format_reward/group_std_mean": 0.03484956584870815, "signal/format_reward/group_zero_std_frac": 0.85, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008846028707921505, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008846028707921505, "signal/frontier_coverage_0/centered_abs_mean": 0.13067993819713591, "signal/frontier_coverage_0/group_std_mean": 0.17672376036643983, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018687231000512837, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018687231000512837, "signal/frontier_coverage_1/centered_abs_mean": 0.13067993819713591, "signal/frontier_coverage_1/group_std_mean": 0.17672376036643983, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018687231000512837, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018687231000512837, "signal/frontier_coverage_10/centered_abs_mean": 0.1259875252842903, "signal/frontier_coverage_10/group_std_mean": 0.17063040137290955, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018016215413808822, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018016215413808822, "signal/frontier_coverage_15/centered_abs_mean": 0.09146946370601654, "signal/frontier_coverage_15/group_std_mean": 0.12508394569158554, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001308013335801661, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001308013335801661, "signal/frontier_coverage_20/centered_abs_mean": 0.058338577300310133, "signal/frontier_coverage_20/group_std_mean": 0.07590894401073456, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008342416607774794, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008342416607774794, "signal/frontier_coverage_25/centered_abs_mean": 0.11929452270269394, "signal/frontier_coverage_25/group_std_mean": 0.152734637260437, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001705911778844893, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001705911778844893, "signal/frontier_coverage_5/centered_abs_mean": 0.13067993819713591, "signal/frontier_coverage_5/group_std_mean": 0.17672376036643983, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018687231000512837, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018687231000512837, "step": 160 }, { "calibration/aurc": 0.1656603853248881, "calibration/batch_distribution_entropy": 0.8566291969090212, "calibration/buffer_distribution_entropy": 0.8535527032072778, "calibration/confidence_entropy": 0.40443817859718545, "calibration/coverage@0%": 0.017381097120307547, "calibration/coverage@1%": 0.017381097120307547, "calibration/coverage@10%": 0.49995407558530747, "calibration/coverage@15%": 0.5949152537180693, "calibration/coverage@20%": 0.6571557394509561, "calibration/coverage@25%": 0.7211049372044049, "calibration/coverage@30%": 0.8517150160293824, "calibration/coverage@5%": 0.057801044626869226, "calibration/ece": 0.11454567795901449, "calibration/mean_confidence": 0.667435459092505, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013107638888888884, "completions/max_length": 3570.0, "completions/max_terminated_length": 3570.0, "completions/mean_length": 726.2890747070312, "completions/mean_terminated_length": 736.1373657226562, "completions/min_length": 0.0, "completions/min_terminated_length": 189.6, "epoch": 0.39599505006187424, "grad_norm": 0.00045935352682136, "learning_rate": 1.2951807228915664e-06, "loss": -0.0097, "num_tokens": 360550651.0, "reward": 1.0085692167282105, "reward_std": 0.12360656410455703, "rewards/accuracy_reward": 0.6678819537162781, "rewards/brier_reward": 0.8069667339324951, "rewards/confidence_uniqueness_reward": 0.9387310028076172, "rewards/format_reward": 0.9868923664093018, "rewards/frontier_coverage_0": 0.04505334049463272, "rewards/frontier_coverage_1": 0.04505334049463272, "rewards/frontier_coverage_10": 0.044467170163989066, "rewards/frontier_coverage_15": 0.041498401761054994, "rewards/frontier_coverage_20": 0.06552209258079529, "rewards/frontier_coverage_25": 0.17571614384651185, "rewards/frontier_coverage_5": 0.045087074488401414, "signal/accuracy_reward/centered_abs_mean": 0.14957682192325591, "signal/accuracy_reward/group_std_mean": 0.20039042532444, "signal/accuracy_reward/group_zero_std_frac": 0.4194444417953491, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07478841096162796, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07478841096162796, "signal/advantage_abs_mean": 0.09047110676765442, "signal/advantage_pre_scale_abs_mean": 0.09047110676765442, "signal/advantage_pre_scale_std": 0.16116170585155487, "signal/advantage_std": 0.16116170585155487, "signal/brier_reward/centered_abs_mean": 0.13711402416229249, "signal/brier_reward/group_std_mean": 0.17915517389774321, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013711402378976344, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013711402378976344, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03691897690296173, "signal/confidence_uniqueness_reward/group_std_mean": 0.05648680925369263, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036918976344168185, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036918976344168185, "signal/format_reward/centered_abs_mean": 0.020296223647892474, "signal/format_reward/group_std_mean": 0.03649218082427978, "signal/format_reward/group_zero_std_frac": 0.8527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010148111823946237, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010148111823946237, "signal/frontier_coverage_0/centered_abs_mean": 0.13668433278799058, "signal/frontier_coverage_0/group_std_mean": 0.1877914160490036, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019545859657227995, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019545859657227995, "signal/frontier_coverage_1/centered_abs_mean": 0.13668433278799058, "signal/frontier_coverage_1/group_std_mean": 0.1877914160490036, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019545859657227995, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019545859657227995, "signal/frontier_coverage_10/centered_abs_mean": 0.13133783787488937, "signal/frontier_coverage_10/group_std_mean": 0.18079141676425933, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018781311810016632, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018781311810016632, "signal/frontier_coverage_15/centered_abs_mean": 0.08820350021123886, "signal/frontier_coverage_15/group_std_mean": 0.12286703139543534, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012613100348971783, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012613100348971783, "signal/frontier_coverage_20/centered_abs_mean": 0.062378589808940885, "signal/frontier_coverage_20/group_std_mean": 0.0826146811246872, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008920137654058636, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008920137654058636, "signal/frontier_coverage_25/centered_abs_mean": 0.11272307336330414, "signal/frontier_coverage_25/group_std_mean": 0.14559331238269807, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016119398642331362, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016119398642331362, "signal/frontier_coverage_5/centered_abs_mean": 0.1365072175860405, "signal/frontier_coverage_5/group_std_mean": 0.1875326693058014, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019520531874150037, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019520531874150037, "step": 165 }, { "calibration/aurc": 0.13592004663092033, "calibration/batch_distribution_entropy": 0.7486099576692926, "calibration/buffer_distribution_entropy": 0.8564281533478454, "calibration/confidence_entropy": 0.37029655417641594, "calibration/coverage@0%": 0.033480513444424695, "calibration/coverage@1%": 0.07117684852295873, "calibration/coverage@10%": 0.4261042923170245, "calibration/coverage@15%": 0.6691992052004629, "calibration/coverage@20%": 0.7751633169909933, "calibration/coverage@25%": 0.8759031317778053, "calibration/coverage@30%": 0.9725213090748339, "calibration/coverage@5%": 0.1496657690046888, "calibration/ece": 0.09613408015602516, "calibration/mean_confidence": 0.7586426281695957, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009548611111111138, "completions/max_length": 3430.4, "completions/max_terminated_length": 3430.4, "completions/mean_length": 703.7692016601562, "completions/mean_terminated_length": 710.5459350585937, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.4079949000637492, "grad_norm": 0.0004403712518978864, "learning_rate": 1.1445783132530121e-06, "loss": -0.0087, "num_tokens": 371747256.0, "reward": 1.0362069845199584, "reward_std": 0.12289563715457916, "rewards/accuracy_reward": 0.722569465637207, "rewards/brier_reward": 0.8146629929542542, "rewards/confidence_uniqueness_reward": 0.9312249541282653, "rewards/format_reward": 0.9904513955116272, "rewards/frontier_coverage_0": 0.015381347015500068, "rewards/frontier_coverage_1": 0.015381347015500068, "rewards/frontier_coverage_10": 0.016636411473155022, "rewards/frontier_coverage_15": 0.02498224622104317, "rewards/frontier_coverage_20": 0.07068880349397659, "rewards/frontier_coverage_25": 0.19831772446632384, "rewards/frontier_coverage_5": 0.01580010838806629, "signal/accuracy_reward/centered_abs_mean": 0.1472873270511627, "signal/accuracy_reward/group_std_mean": 0.19956836700439454, "signal/accuracy_reward/group_zero_std_frac": 0.4138888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07364366352558135, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07364366352558135, "signal/advantage_abs_mean": 0.08684393763542175, "signal/advantage_pre_scale_abs_mean": 0.08684393763542175, "signal/advantage_pre_scale_std": 0.1598696678876877, "signal/advantage_std": 0.1598696678876877, "signal/brier_reward/centered_abs_mean": 0.13169292807579042, "signal/brier_reward/group_std_mean": 0.17282358705997466, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013169292360544205, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013169292360544205, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.039322879165410995, "signal/confidence_uniqueness_reward/group_std_mean": 0.06056636646389961, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003932288242504, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003932288242504, "signal/format_reward/centered_abs_mean": 0.01654730923473835, "signal/format_reward/group_std_mean": 0.033677156642079355, "signal/format_reward/group_zero_std_frac": 0.85, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008273654617369175, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008273654617369175, "signal/frontier_coverage_0/centered_abs_mean": 0.11836729645729065, "signal/frontier_coverage_0/group_std_mean": 0.16220209896564483, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016926524229347705, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016926524229347705, "signal/frontier_coverage_1/centered_abs_mean": 0.11836729645729065, "signal/frontier_coverage_1/group_std_mean": 0.16220209896564483, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016926524229347705, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016926524229347705, "signal/frontier_coverage_10/centered_abs_mean": 0.11148134768009185, "signal/frontier_coverage_10/group_std_mean": 0.15295161604881286, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015941831981763244, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015941831981763244, "signal/frontier_coverage_15/centered_abs_mean": 0.07999386936426163, "signal/frontier_coverage_15/group_std_mean": 0.10980904847383499, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001143912342377007, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001143912342377007, "signal/frontier_coverage_20/centered_abs_mean": 0.0641142837703228, "signal/frontier_coverage_20/group_std_mean": 0.08305520564317703, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009168342221528292, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009168342221528292, "signal/frontier_coverage_25/centered_abs_mean": 0.11506871432065964, "signal/frontier_coverage_25/group_std_mean": 0.14938458502292634, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016454826574772597, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016454826574772597, "signal/frontier_coverage_5/centered_abs_mean": 0.11742766797542573, "signal/frontier_coverage_5/group_std_mean": 0.1609276831150055, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001679215719923377, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001679215719923377, "step": 170 }, { "calibration/aurc": 0.13348262780460535, "calibration/batch_distribution_entropy": 0.8223084451849527, "calibration/buffer_distribution_entropy": 0.8346515524361726, "calibration/confidence_entropy": 0.3854238749390199, "calibration/coverage@0%": 0.036539926699624505, "calibration/coverage@1%": 0.036539926699624505, "calibration/coverage@10%": 0.422825227026967, "calibration/coverage@15%": 0.5584091115953049, "calibration/coverage@20%": 0.8452149741820681, "calibration/coverage@25%": 0.9379946198087186, "calibration/coverage@30%": 0.9821740872231969, "calibration/coverage@5%": 0.13151984499498745, "calibration/ece": 0.0950542819703474, "calibration/mean_confidence": 0.7023832506280211, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012934027777777812, "completions/max_length": 3636.2, "completions/max_terminated_length": 3636.2, "completions/mean_length": 736.4591186523437, "completions/mean_terminated_length": 746.1770385742187, "completions/min_length": 0.0, "completions/min_terminated_length": 198.8, "epoch": 0.4199947500656242, "grad_norm": 0.00045567096094600856, "learning_rate": 9.93975903614458e-07, "loss": -0.0117, "num_tokens": 383339233.0, "reward": 1.0289367198944093, "reward_std": 0.1303061842918396, "rewards/accuracy_reward": 0.71171875, "rewards/brier_reward": 0.8070045828819274, "rewards/confidence_uniqueness_reward": 0.9229098796844483, "rewards/format_reward": 0.9870659708976746, "rewards/frontier_coverage_0": 0.01920226626098156, "rewards/frontier_coverage_1": 0.01920226626098156, "rewards/frontier_coverage_10": 0.01852501593530178, "rewards/frontier_coverage_15": 0.029568823985755444, "rewards/frontier_coverage_20": 0.10787947475910187, "rewards/frontier_coverage_25": 0.2446742206811905, "rewards/frontier_coverage_5": 0.019195317476987838, "signal/accuracy_reward/centered_abs_mean": 0.1561903178691864, "signal/accuracy_reward/group_std_mean": 0.2148987740278244, "signal/accuracy_reward/group_zero_std_frac": 0.3638888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0780951589345932, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0780951589345932, "signal/advantage_abs_mean": 0.0919778436422348, "signal/advantage_pre_scale_abs_mean": 0.0919778436422348, "signal/advantage_pre_scale_std": 0.16701272428035735, "signal/advantage_std": 0.16701272428035735, "signal/brier_reward/centered_abs_mean": 0.14022985696792603, "signal/brier_reward/group_std_mean": 0.18417258262634278, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014022985659539699, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014022985659539699, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04660597518086433, "signal/confidence_uniqueness_reward/group_std_mean": 0.06790307313203811, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0046605975832790135, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0046605975832790135, "signal/format_reward/centered_abs_mean": 0.02120768204331398, "signal/format_reward/group_std_mean": 0.03753828890621662, "signal/format_reward/group_zero_std_frac": 0.8499999880790711, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01060384102165699, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01060384102165699, "signal/frontier_coverage_0/centered_abs_mean": 0.13295696824789047, "signal/frontier_coverage_0/group_std_mean": 0.18069115579128264, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019012847449630498, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019012847449630498, "signal/frontier_coverage_1/centered_abs_mean": 0.13295696824789047, "signal/frontier_coverage_1/group_std_mean": 0.18069115579128264, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019012847449630498, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019012847449630498, "signal/frontier_coverage_10/centered_abs_mean": 0.11907797455787658, "signal/frontier_coverage_10/group_std_mean": 0.16228229701519012, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017028149915859104, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017028149915859104, "signal/frontier_coverage_15/centered_abs_mean": 0.07870914041996002, "signal/frontier_coverage_15/group_std_mean": 0.10632596611976623, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00112554068909958, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00112554068909958, "signal/frontier_coverage_20/centered_abs_mean": 0.08249068260192871, "signal/frontier_coverage_20/group_std_mean": 0.10877174586057663, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011796167120337487, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011796167120337487, "signal/frontier_coverage_25/centered_abs_mean": 0.1382613003253937, "signal/frontier_coverage_25/group_std_mean": 0.1832427829504013, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019771367078647017, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019771367078647017, "signal/frontier_coverage_5/centered_abs_mean": 0.13113310188055038, "signal/frontier_coverage_5/group_std_mean": 0.17831478118896485, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018752032425254582, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018752032425254582, "step": 175 }, { "calibration/aurc": 0.09345007181896194, "calibration/batch_distribution_entropy": 0.801623303579988, "calibration/buffer_distribution_entropy": 0.8262189453353539, "calibration/confidence_entropy": 0.38592001304418866, "calibration/coverage@0%": 0.08496741084676991, "calibration/coverage@1%": 0.14393717568439587, "calibration/coverage@10%": 0.6195064116269997, "calibration/coverage@15%": 0.8078410920640862, "calibration/coverage@20%": 0.9067183277342291, "calibration/coverage@25%": 0.9640873460246361, "calibration/coverage@30%": 0.9936842105263158, "calibration/coverage@5%": 0.3787979947574124, "calibration/ece": 0.09400609019464161, "calibration/mean_confidence": 0.7386594045732192, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011631944444444441, "completions/max_length": 3725.4, "completions/max_terminated_length": 3725.4, "completions/mean_length": 705.6876831054688, "completions/mean_terminated_length": 713.9794677734375, "completions/min_length": 0.0, "completions/min_terminated_length": 216.8, "epoch": 0.4319946000674992, "grad_norm": 0.0004313621611800045, "learning_rate": 8.433734939759036e-07, "loss": -0.0093, "num_tokens": 394568723.0, "reward": 1.0281787872314454, "reward_std": 0.12547616958618163, "rewards/accuracy_reward": 0.7125000119209289, "rewards/brier_reward": 0.7898874640464782, "rewards/confidence_uniqueness_reward": 0.9186937093734742, "rewards/format_reward": 0.9881944537162781, "rewards/frontier_coverage_0": 0.010317787062376738, "rewards/frontier_coverage_1": 0.010276203881949187, "rewards/frontier_coverage_10": 0.013230977579951286, "rewards/frontier_coverage_15": 0.0318543815985322, "rewards/frontier_coverage_20": 0.12444168329238892, "rewards/frontier_coverage_25": 0.2868518948554993, "rewards/frontier_coverage_5": 0.010678381472826005, "signal/accuracy_reward/centered_abs_mean": 0.15108506977558137, "signal/accuracy_reward/group_std_mean": 0.1995271176099777, "signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07554253488779068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07554253488779068, "signal/advantage_abs_mean": 0.09149419367313386, "signal/advantage_pre_scale_abs_mean": 0.09149419367313386, "signal/advantage_pre_scale_std": 0.16792958378791809, "signal/advantage_std": 0.16792958378791809, "signal/brier_reward/centered_abs_mean": 0.140881010890007, "signal/brier_reward/group_std_mean": 0.1828139305114746, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01408810093998909, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01408810093998909, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04758927077054977, "signal/confidence_uniqueness_reward/group_std_mean": 0.06954466402530671, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004758927039802074, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004758927039802074, "signal/format_reward/centered_abs_mean": 0.019726562313735485, "signal/format_reward/group_std_mean": 0.037197813764214514, "signal/format_reward/group_zero_std_frac": 0.8444444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009863281156867743, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009863281156867743, "signal/frontier_coverage_0/centered_abs_mean": 0.12390959560871125, "signal/frontier_coverage_0/group_std_mean": 0.17109252214431764, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017719071358442307, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017719071358442307, "signal/frontier_coverage_1/centered_abs_mean": 0.12380760312080383, "signal/frontier_coverage_1/group_std_mean": 0.17095208466053008, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017704485915601253, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017704485915601253, "signal/frontier_coverage_10/centered_abs_mean": 0.10980342477560043, "signal/frontier_coverage_10/group_std_mean": 0.1521240144968033, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015701889526098967, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015701889526098967, "signal/frontier_coverage_15/centered_abs_mean": 0.0799461305141449, "signal/frontier_coverage_15/group_std_mean": 0.10864663273096084, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011432296945713461, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011432296945713461, "signal/frontier_coverage_20/centered_abs_mean": 0.08835995942354202, "signal/frontier_coverage_20/group_std_mean": 0.11567166298627854, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001263547409325838, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001263547409325838, "signal/frontier_coverage_25/centered_abs_mean": 0.16603110134601592, "signal/frontier_coverage_25/group_std_mean": 0.21807546317577362, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023742446210235357, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023742446210235357, "signal/frontier_coverage_5/centered_abs_mean": 0.1223075494170189, "signal/frontier_coverage_5/group_std_mean": 0.16897510588169098, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017489979742094874, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017489979742094874, "step": 180 }, { "calibration/aurc": 0.17122914709216733, "calibration/batch_distribution_entropy": 0.7880253868471856, "calibration/buffer_distribution_entropy": 0.8186282369699118, "calibration/confidence_entropy": 0.3642710142144591, "calibration/coverage@0%": 0.018777247401564758, "calibration/coverage@1%": 0.018777247401564758, "calibration/coverage@10%": 0.1579079193038335, "calibration/coverage@15%": 0.5548855949538294, "calibration/coverage@20%": 0.8632928767984499, "calibration/coverage@25%": 0.9036374379904082, "calibration/coverage@30%": 0.9350785340314136, "calibration/coverage@5%": 0.018777247401564758, "calibration/ece": 0.14703350637447332, "calibration/mean_confidence": 0.7192671705590277, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014843750000000022, "completions/max_length": 3571.8, "completions/max_terminated_length": 3571.8, "completions/mean_length": 706.883251953125, "completions/mean_terminated_length": 717.6452026367188, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.44399445006937416, "grad_norm": 0.00048535055248066783, "learning_rate": 6.927710843373495e-07, "loss": -0.0105, "num_tokens": 405802066.0, "reward": 1.014847993850708, "reward_std": 0.13151782006025314, "rewards/accuracy_reward": 0.686718761920929, "rewards/brier_reward": 0.7976317048072815, "rewards/confidence_uniqueness_reward": 0.922605574131012, "rewards/format_reward": 0.9850694537162781, "rewards/frontier_coverage_0": 0.026550618838518857, "rewards/frontier_coverage_1": 0.026589373406022788, "rewards/frontier_coverage_10": 0.027571763657033444, "rewards/frontier_coverage_15": 0.03620800599455833, "rewards/frontier_coverage_20": 0.08736573904752731, "rewards/frontier_coverage_25": 0.25345793068408967, "rewards/frontier_coverage_5": 0.026884720474481583, "signal/accuracy_reward/centered_abs_mean": 0.1610948324203491, "signal/accuracy_reward/group_std_mean": 0.21088041365146637, "signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08054741621017455, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08054741621017455, "signal/advantage_abs_mean": 0.09806140363216401, "signal/advantage_pre_scale_abs_mean": 0.09806140363216401, "signal/advantage_pre_scale_std": 0.16944925785064696, "signal/advantage_std": 0.16944925785064696, "signal/brier_reward/centered_abs_mean": 0.14560845494270325, "signal/brier_reward/group_std_mean": 0.18695748448371888, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014560846239328384, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014560846239328384, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.045430511236190796, "signal/confidence_uniqueness_reward/group_std_mean": 0.06660099476575851, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00454305112361908, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00454305112361908, "signal/format_reward/centered_abs_mean": 0.0214952252805233, "signal/format_reward/group_std_mean": 0.037768884748220447, "signal/format_reward/group_zero_std_frac": 0.850000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01074761264026165, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01074761264026165, "signal/frontier_coverage_0/centered_abs_mean": 0.13311404585838318, "signal/frontier_coverage_0/group_std_mean": 0.18202302753925323, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019035307923331857, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019035307923331857, "signal/frontier_coverage_1/centered_abs_mean": 0.1330111652612686, "signal/frontier_coverage_1/group_std_mean": 0.18189111053943635, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019020596519112587, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019020596519112587, "signal/frontier_coverage_10/centered_abs_mean": 0.12245990186929703, "signal/frontier_coverage_10/group_std_mean": 0.16796686351299286, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001751176593825221, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001751176593825221, "signal/frontier_coverage_15/centered_abs_mean": 0.09790500402450561, "signal/frontier_coverage_15/group_std_mean": 0.13461994379758835, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014000415336340665, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014000415336340665, "signal/frontier_coverage_20/centered_abs_mean": 0.07675344049930573, "signal/frontier_coverage_20/group_std_mean": 0.10184175372123719, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010975741781294346, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010975741781294346, "signal/frontier_coverage_25/centered_abs_mean": 0.1587800681591034, "signal/frontier_coverage_25/group_std_mean": 0.20560413897037505, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022705549374222754, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022705549374222754, "signal/frontier_coverage_5/centered_abs_mean": 0.1313490241765976, "signal/frontier_coverage_5/group_std_mean": 0.17971158623695374, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018782909493893385, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018782909493893385, "step": 185 }, { "calibration/aurc": 0.18193512910285387, "calibration/batch_distribution_entropy": 0.7778199175476095, "calibration/buffer_distribution_entropy": 0.8242070841531243, "calibration/confidence_entropy": 0.3678612367693591, "calibration/coverage@0%": 0.05865979043198364, "calibration/coverage@1%": 0.05865979043198364, "calibration/coverage@10%": 0.34553174257808716, "calibration/coverage@15%": 0.38030753968253966, "calibration/coverage@20%": 0.586945316731242, "calibration/coverage@25%": 0.854896653543307, "calibration/coverage@30%": 0.9696645341207348, "calibration/coverage@5%": 0.2798431520853192, "calibration/ece": 0.12763212133180607, "calibration/mean_confidence": 0.7513209126914786, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00824652777777779, "completions/max_length": 3541.6, "completions/max_terminated_length": 3541.6, "completions/mean_length": 699.0154541015625, "completions/mean_terminated_length": 704.8328979492187, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.45599430007124914, "grad_norm": 0.00043432877282612026, "learning_rate": 5.421686746987952e-07, "loss": -0.0066, "num_tokens": 416937668.0, "reward": 1.0384333848953247, "reward_std": 0.12415469735860825, "rewards/accuracy_reward": 0.7216145873069764, "rewards/brier_reward": 0.8176455974578858, "rewards/confidence_uniqueness_reward": 0.9341374635696411, "rewards/format_reward": 0.9917534708976745, "rewards/frontier_coverage_0": 0.02207240234129131, "rewards/frontier_coverage_1": 0.022145295469090342, "rewards/frontier_coverage_10": 0.023324301373213528, "rewards/frontier_coverage_15": 0.0292608555406332, "rewards/frontier_coverage_20": 0.07275687083601952, "rewards/frontier_coverage_25": 0.2676252216100693, "rewards/frontier_coverage_5": 0.0223290272988379, "signal/accuracy_reward/centered_abs_mean": 0.16108398735523224, "signal/accuracy_reward/group_std_mean": 0.2116878628730774, "signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08054199367761612, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08054199367761612, "signal/advantage_abs_mean": 0.08960044384002686, "signal/advantage_pre_scale_abs_mean": 0.08960044384002686, "signal/advantage_pre_scale_std": 0.16030249297618865, "signal/advantage_std": 0.16030249297618865, "signal/brier_reward/centered_abs_mean": 0.13036527633666992, "signal/brier_reward/group_std_mean": 0.17200060486793517, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013036527484655381, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013036527484655381, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0354267667979002, "signal/confidence_uniqueness_reward/group_std_mean": 0.05462343618273735, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003542676754295826, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003542676754295826, "signal/format_reward/centered_abs_mean": 0.01468641497194767, "signal/format_reward/group_std_mean": 0.02957034520804882, "signal/format_reward/group_zero_std_frac": 0.8722222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007343207485973835, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007343207485973835, "signal/frontier_coverage_0/centered_abs_mean": 0.1278734177350998, "signal/frontier_coverage_0/group_std_mean": 0.17466306388378144, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018285899190232159, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018285899190232159, "signal/frontier_coverage_1/centered_abs_mean": 0.12777684330940248, "signal/frontier_coverage_1/group_std_mean": 0.1745421200990677, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018272089073434472, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018272089073434472, "signal/frontier_coverage_10/centered_abs_mean": 0.12328650057315826, "signal/frontier_coverage_10/group_std_mean": 0.16869595050811767, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017629968700930477, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017629968700930477, "signal/frontier_coverage_15/centered_abs_mean": 0.10532844662666321, "signal/frontier_coverage_15/group_std_mean": 0.14432147443294524, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015061968471854926, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015061968471854926, "signal/frontier_coverage_20/centered_abs_mean": 0.07010861709713936, "signal/frontier_coverage_20/group_std_mean": 0.09235068708658219, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010025532450526954, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010025532450526954, "signal/frontier_coverage_25/centered_abs_mean": 0.1480533003807068, "signal/frontier_coverage_25/group_std_mean": 0.19292950630187988, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002117162151262164, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002117162151262164, "signal/frontier_coverage_5/centered_abs_mean": 0.12679695785045625, "signal/frontier_coverage_5/group_std_mean": 0.17328327000141144, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018131964607164264, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018131964607164264, "step": 190 }, { "calibration/aurc": 0.19111161916549027, "calibration/batch_distribution_entropy": 0.8201703152347986, "calibration/buffer_distribution_entropy": 0.8106126533134101, "calibration/confidence_entropy": 0.37700403449513026, "calibration/coverage@0%": 0.015711122047244095, "calibration/coverage@1%": 0.015711122047244095, "calibration/coverage@10%": 0.27973864449795016, "calibration/coverage@15%": 0.5170787035530683, "calibration/coverage@20%": 0.5962166628706536, "calibration/coverage@25%": 0.6621170432853951, "calibration/coverage@30%": 0.7699052081299754, "calibration/coverage@5%": 0.11544865485564304, "calibration/ece": 0.14145817553412607, "calibration/mean_confidence": 0.6937342965939054, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012673611111111094, "completions/max_length": 3786.0, "completions/max_terminated_length": 3786.0, "completions/mean_length": 717.1263916015625, "completions/mean_terminated_length": 726.4796875, "completions/min_length": 0.0, "completions/min_terminated_length": 180.2, "epoch": 0.46799415007312406, "grad_norm": 0.0004493010346777737, "learning_rate": 3.91566265060241e-07, "loss": -0.0096, "num_tokens": 428279828.0, "reward": 1.008443033695221, "reward_std": 0.12740874141454697, "rewards/accuracy_reward": 0.6736111044883728, "rewards/brier_reward": 0.7872533559799194, "rewards/confidence_uniqueness_reward": 0.9333649158477784, "rewards/format_reward": 0.9872395873069764, "rewards/frontier_coverage_0": 0.027697153389453888, "rewards/frontier_coverage_1": 0.027658072859048845, "rewards/frontier_coverage_10": 0.027347087673842908, "rewards/frontier_coverage_15": 0.030936553701758386, "rewards/frontier_coverage_20": 0.06847289353609085, "rewards/frontier_coverage_25": 0.20654830634593963, "rewards/frontier_coverage_5": 0.027833018451929092, "signal/accuracy_reward/centered_abs_mean": 0.1546983540058136, "signal/accuracy_reward/group_std_mean": 0.2061179220676422, "signal/accuracy_reward/group_zero_std_frac": 0.40555556416511535, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0773491770029068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0773491770029068, "signal/advantage_abs_mean": 0.09210169464349746, "signal/advantage_pre_scale_abs_mean": 0.09210169464349746, "signal/advantage_pre_scale_std": 0.16345611214637756, "signal/advantage_std": 0.16345611214637756, "signal/brier_reward/centered_abs_mean": 0.14198019355535507, "signal/brier_reward/group_std_mean": 0.18473336696624756, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014198019355535507, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014198019355535507, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03743585646152496, "signal/confidence_uniqueness_reward/group_std_mean": 0.058041921257972716, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037435856182128193, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037435856182128193, "signal/format_reward/centered_abs_mean": 0.019276259280741215, "signal/format_reward/group_std_mean": 0.036431630700826646, "signal/format_reward/group_zero_std_frac": 0.8472222447395324, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009638129640370608, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009638129640370608, "signal/frontier_coverage_0/centered_abs_mean": 0.12945400625467302, "signal/frontier_coverage_0/group_std_mean": 0.1761310279369354, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001851192256435752, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001851192256435752, "signal/frontier_coverage_1/centered_abs_mean": 0.12929727286100387, "signal/frontier_coverage_1/group_std_mean": 0.17592544853687286, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018489510286599398, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018489510286599398, "signal/frontier_coverage_10/centered_abs_mean": 0.12513509392738342, "signal/frontier_coverage_10/group_std_mean": 0.17050479352474213, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001789431762881577, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001789431762881577, "signal/frontier_coverage_15/centered_abs_mean": 0.10048199743032456, "signal/frontier_coverage_15/group_std_mean": 0.13747784048318862, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014368925243616105, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014368925243616105, "signal/frontier_coverage_20/centered_abs_mean": 0.07295912206172943, "signal/frontier_coverage_20/group_std_mean": 0.09612232595682144, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010433154529891908, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010433154529891908, "signal/frontier_coverage_25/centered_abs_mean": 0.13768279552459717, "signal/frontier_coverage_25/group_std_mean": 0.18044605255126953, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001968864002265036, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001968864002265036, "signal/frontier_coverage_5/centered_abs_mean": 0.12782656103372575, "signal/frontier_coverage_5/group_std_mean": 0.17402253448963165, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018279198091477155, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018279198091477155, "step": 195 }, { "calibration/aurc": 0.17856991685454757, "calibration/batch_distribution_entropy": 0.7682419787958594, "calibration/buffer_distribution_entropy": 0.8208414222154016, "calibration/confidence_entropy": 0.3823009379147786, "calibration/coverage@0%": 0.012575064242624123, "calibration/coverage@1%": 0.012575064242624123, "calibration/coverage@10%": 0.29729537286758967, "calibration/coverage@15%": 0.43410561899657835, "calibration/coverage@20%": 0.6063885338939963, "calibration/coverage@25%": 0.8951074475065617, "calibration/coverage@30%": 0.9317585301837269, "calibration/coverage@5%": 0.17731190634788727, "calibration/ece": 0.141153362219527, "calibration/mean_confidence": 0.7661149606034827, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00859375, "completions/max_length": 3273.2, "completions/max_terminated_length": 3273.2, "completions/mean_length": 702.9766479492188, "completions/mean_terminated_length": 709.0731323242187, "completions/min_length": 0.0, "completions/min_terminated_length": 182.6, "epoch": 0.47999400007499904, "grad_norm": 0.0004507655103225261, "learning_rate": 2.409638554216868e-07, "loss": -0.006, "num_tokens": 439445927.0, "reward": 1.0263540983200072, "reward_std": 0.12140857428312302, "rewards/accuracy_reward": 0.6982638835906982, "rewards/brier_reward": 0.8136134743690491, "rewards/confidence_uniqueness_reward": 0.9341794133186341, "rewards/format_reward": 0.99140625, "rewards/frontier_coverage_0": 0.032625177130103114, "rewards/frontier_coverage_1": 0.032625177130103114, "rewards/frontier_coverage_10": 0.031621862575411795, "rewards/frontier_coverage_15": 0.029490308091044425, "rewards/frontier_coverage_20": 0.0741084560751915, "rewards/frontier_coverage_25": 0.23827401399612427, "rewards/frontier_coverage_5": 0.032565965130925176, "signal/accuracy_reward/centered_abs_mean": 0.14779730439186095, "signal/accuracy_reward/group_std_mean": 0.19302791357040405, "signal/accuracy_reward/group_zero_std_frac": 0.45555556416511533, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07389865219593048, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07389865219593048, "signal/advantage_abs_mean": 0.08865651488304138, "signal/advantage_pre_scale_abs_mean": 0.08865651488304138, "signal/advantage_pre_scale_std": 0.16045403480529785, "signal/advantage_std": 0.16045403480529785, "signal/brier_reward/centered_abs_mean": 0.1274328500032425, "signal/brier_reward/group_std_mean": 0.1681652307510376, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012743284739553929, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012743284739553929, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03525126874446869, "signal/confidence_uniqueness_reward/group_std_mean": 0.057352755963802335, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035251271445304157, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035251271445304157, "signal/format_reward/centered_abs_mean": 0.015196397714316846, "signal/format_reward/group_std_mean": 0.033493170887231825, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007598198857158423, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007598198857158423, "signal/frontier_coverage_0/centered_abs_mean": 0.10695488750934601, "signal/frontier_coverage_0/group_std_mean": 0.15166882872581483, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015294548822566867, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015294548822566867, "signal/frontier_coverage_1/centered_abs_mean": 0.10695488750934601, "signal/frontier_coverage_1/group_std_mean": 0.15166882872581483, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015294548822566867, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015294548822566867, "signal/frontier_coverage_10/centered_abs_mean": 0.1000509575009346, "signal/frontier_coverage_10/group_std_mean": 0.14240919947624206, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014307287288829683, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014307287288829683, "signal/frontier_coverage_15/centered_abs_mean": 0.08026445358991623, "signal/frontier_coverage_15/group_std_mean": 0.11479192227125168, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011477816849946975, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011477816849946975, "signal/frontier_coverage_20/centered_abs_mean": 0.07273156195878983, "signal/frontier_coverage_20/group_std_mean": 0.09654622375965119, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010400613187812268, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010400613187812268, "signal/frontier_coverage_25/centered_abs_mean": 0.14482411444187165, "signal/frontier_coverage_25/group_std_mean": 0.18901716768741608, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020709848264232277, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020709848264232277, "signal/frontier_coverage_5/centered_abs_mean": 0.10635966509580612, "signal/frontier_coverage_5/group_std_mean": 0.15090845227241517, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015209432225674392, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015209432225674392, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 0.15612408573344327, "eval_calibration/batch_distribution_entropy": 0.7397058913988753, "eval_calibration/buffer_distribution_entropy": 0.8111514149268445, "eval_calibration/confidence_entropy": 0.3610454566922106, "eval_calibration/coverage@0%": 0.09895833333333333, "eval_calibration/coverage@1%": 0.09895833333333333, "eval_calibration/coverage@10%": 0.4114583333333333, "eval_calibration/coverage@15%": 0.6510416666666666, "eval_calibration/coverage@20%": 0.7864583333333334, "eval_calibration/coverage@25%": 0.9427083333333334, "eval_calibration/coverage@30%": 0.9895833333333334, "eval_calibration/coverage@5%": 0.20833333333333334, "eval_calibration/ece": 0.164064940040337, "eval_calibration/mean_confidence": 0.7508913785324004, "eval_completions/clipped_ratio": 0.006076388888888877, "eval_completions/max_length": 2743.3333333333335, "eval_completions/max_terminated_length": 2743.3333333333335, "eval_completions/mean_length": 718.5382080078125, "eval_completions/mean_terminated_length": 722.8815104166666, "eval_completions/min_length": 98.83333333333333, "eval_completions/min_terminated_length": 235.33333333333334, "eval_loss": 0.0, "eval_num_tokens": 439445927.0, "eval_reward": 1.012143760919571, "eval_reward_std": 0.2549586296081543, "eval_rewards/accuracy_reward": 0.6822916666666666, "eval_rewards/brier_reward": 0.8020952641963959, "eval_rewards/confidence_uniqueness_reward": 0.884552131096522, "eval_rewards/format_reward": 0.9921874900658926, "eval_rewards/frontier_coverage_0": 0.03533175913617015, "eval_rewards/frontier_coverage_1": 0.03533175913617015, "eval_rewards/frontier_coverage_10": 0.035481404474315546, "eval_rewards/frontier_coverage_15": 0.03456710961957773, "eval_rewards/frontier_coverage_20": 0.06474988100429376, "eval_rewards/frontier_coverage_25": 0.19553381452957788, "eval_rewards/frontier_coverage_5": 0.035325445909014284, "eval_runtime": 205.202, "eval_samples_per_second": 4.873, "eval_signal/accuracy_reward/centered_abs_mean": 0.4191623230775197, "eval_signal/accuracy_reward/group_std_mean": 0.4642222821712494, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20958116153875986, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20958116153875986, "eval_signal/advantage_abs_mean": 0.22168858846028647, "eval_signal/advantage_pre_scale_abs_mean": 0.22168858846028647, "eval_signal/advantage_pre_scale_std": 0.2531501104434331, "eval_signal/advantage_std": 0.2531501104434331, "eval_signal/brier_reward/centered_abs_mean": 0.22948966672023138, "eval_signal/brier_reward/group_std_mean": 0.29068108399709064, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022948966672023136, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.022948966672023136, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.057275036349892616, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08676877121130626, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00572750383677582, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00572750383677582, "eval_signal/format_reward/centered_abs_mean": 0.015136718284338713, "eval_signal/format_reward/group_std_mean": 0.044194173999130726, "eval_signal/format_reward/group_zero_std_frac": 0.750000019868215, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007568359142169356, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359142169356, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.1817739779750506, "eval_signal/frontier_coverage_0/group_std_mean": 0.3032199541727702, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025993677166601024, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025993677166601024, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.1817739779750506, "eval_signal/frontier_coverage_1/group_std_mean": 0.3032199541727702, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025993677166601024, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025993677166601024, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.17377296338478723, "eval_signal/frontier_coverage_10/group_std_mean": 0.2915558119614919, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002484953341384729, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002484953341384729, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.13976867124438286, "eval_signal/frontier_coverage_15/group_std_mean": 0.2382419357697169, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019986919360235333, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019986919360235333, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.11765346676111221, "eval_signal/frontier_coverage_20/group_std_mean": 0.16934698323408762, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016824445920065045, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016824445920065045, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.27798762917518616, "eval_signal/frontier_coverage_25/group_std_mean": 0.33808498084545135, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00397522277974834, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00397522277974834, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.18090522040923437, "eval_signal/frontier_coverage_5/group_std_mean": 0.30195263028144836, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025869448048373065, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025869448048373065, "eval_steps_per_second": 0.029, "step": 200 }, { "calibration/aurc": 0.20067151651571807, "calibration/batch_distribution_entropy": 0.7387570320570516, "calibration/buffer_distribution_entropy": 0.8004499003116875, "calibration/confidence_entropy": 0.3662937348410689, "calibration/coverage@0%": 0.02244764397905759, "calibration/coverage@1%": 0.02244764397905759, "calibration/coverage@10%": 0.1557400741710297, "calibration/coverage@15%": 0.376529777486911, "calibration/coverage@20%": 0.47280793607054294, "calibration/coverage@25%": 0.8266519702397355, "calibration/coverage@30%": 0.9607329842931938, "calibration/coverage@5%": 0.03338514397905759, "calibration/ece": 0.13317514525521598, "calibration/mean_confidence": 0.7690856718981816, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008767361111111116, "completions/max_length": 3502.2, "completions/max_terminated_length": 3502.2, "completions/mean_length": 717.9185913085937, "completions/mean_terminated_length": 724.2613647460937, "completions/min_length": 0.0, "completions/min_terminated_length": 173.8, "epoch": 0.491993850076874, "grad_norm": 0.00037455017445608974, "learning_rate": 9.036144578313253e-08, "loss": -0.0073, "num_tokens": 450782301.0, "reward": 1.0498441219329835, "reward_std": 0.11888199001550674, "rewards/accuracy_reward": 0.7449652791023255, "rewards/brier_reward": 0.8252813696861268, "rewards/confidence_uniqueness_reward": 0.9361446976661683, "rewards/format_reward": 0.9912326455116272, "rewards/frontier_coverage_0": 0.011065579298883677, "rewards/frontier_coverage_1": 0.011065579298883677, "rewards/frontier_coverage_10": 0.013013468869030476, "rewards/frontier_coverage_15": 0.01967374011874199, "rewards/frontier_coverage_20": 0.07497628033161163, "rewards/frontier_coverage_25": 0.25076726377010344, "rewards/frontier_coverage_5": 0.011220467463135719, "signal/accuracy_reward/centered_abs_mean": 0.14809027910232545, "signal/accuracy_reward/group_std_mean": 0.20212058126926422, "signal/accuracy_reward/group_zero_std_frac": 0.402777773141861, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07404513955116272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07404513955116272, "signal/advantage_abs_mean": 0.0827422708272934, "signal/advantage_pre_scale_abs_mean": 0.0827422708272934, "signal/advantage_pre_scale_std": 0.1549478828907013, "signal/advantage_std": 0.1549478828907013, "signal/brier_reward/centered_abs_mean": 0.12346100956201553, "signal/brier_reward/group_std_mean": 0.16328605115413666, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012346101738512517, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012346101738512517, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0347956083714962, "signal/confidence_uniqueness_reward/group_std_mean": 0.05594836547970772, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034795609302818776, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034795609302818776, "signal/format_reward/centered_abs_mean": 0.015869140625, "signal/format_reward/group_std_mean": 0.03326698914170265, "signal/format_reward/group_zero_std_frac": 0.8527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0079345703125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0079345703125, "signal/frontier_coverage_0/centered_abs_mean": 0.12408159524202347, "signal/frontier_coverage_0/group_std_mean": 0.16999780237674714, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017743667354807257, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017743667354807257, "signal/frontier_coverage_1/centered_abs_mean": 0.12408159524202347, "signal/frontier_coverage_1/group_std_mean": 0.16999780237674714, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017743667354807257, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017743667354807257, "signal/frontier_coverage_10/centered_abs_mean": 0.1159680426120758, "signal/frontier_coverage_10/group_std_mean": 0.15904043912887572, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016583429649472236, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016583429649472236, "signal/frontier_coverage_15/centered_abs_mean": 0.09267009794712067, "signal/frontier_coverage_15/group_std_mean": 0.12770854830741882, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013251824537292122, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013251824537292122, "signal/frontier_coverage_20/centered_abs_mean": 0.06762583926320076, "signal/frontier_coverage_20/group_std_mean": 0.09054560959339142, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009670495055615902, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009670495055615902, "signal/frontier_coverage_25/centered_abs_mean": 0.12514513731002808, "signal/frontier_coverage_25/group_std_mean": 0.16600916385650635, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017895755358040334, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017895755358040334, "signal/frontier_coverage_5/centered_abs_mean": 0.12371799796819687, "signal/frontier_coverage_5/group_std_mean": 0.16952943801879883, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001769167324528098, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001769167324528098, "step": 205 }, { "calibration/aurc": 0.09258411805079743, "calibration/batch_distribution_entropy": 0.7909286874200081, "calibration/buffer_distribution_entropy": 0.7829421573350821, "calibration/confidence_entropy": 0.380171748559697, "calibration/coverage@0%": 0.07280701754385965, "calibration/coverage@1%": 0.10964912280701755, "calibration/coverage@10%": 0.6285096107805916, "calibration/coverage@15%": 0.7868216911868257, "calibration/coverage@20%": 0.9004364963028112, "calibration/coverage@25%": 0.9665205474352353, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.4454574210372016, "calibration/ece": 0.06343821279388381, "calibration/mean_confidence": 0.7582262227842415, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00853587962962965, "completions/max_length": 2820.3333333333335, "completions/max_terminated_length": 2820.3333333333335, "completions/mean_length": 715.6140340169271, "completions/mean_terminated_length": 721.9536946614584, "completions/min_length": 0.0, "completions/min_terminated_length": 167.33333333333334, "epoch": 0.49919376007799904, "num_tokens": 457594673.0, "reward": 1.0277764797210693, "reward_std": 0.12234559903542201, "rewards/accuracy_reward": 0.7025462985038757, "rewards/brier_reward": 0.8049926360448202, "rewards/confidence_uniqueness_reward": 0.932228704293569, "rewards/format_reward": 0.9914641181627909, "rewards/frontier_coverage_0": 0.023547140260537464, "rewards/frontier_coverage_1": 0.023547140260537464, "rewards/frontier_coverage_10": 0.023169512239595253, "rewards/frontier_coverage_15": 0.028680586876968544, "rewards/frontier_coverage_20": 0.09770172089338303, "rewards/frontier_coverage_25": 0.2726644178231557, "rewards/frontier_coverage_5": 0.02363560472925504, "signal/accuracy_reward/centered_abs_mean": 0.15646701554457346, "signal/accuracy_reward/group_std_mean": 0.20409129559993744, "signal/accuracy_reward/group_zero_std_frac": 0.4305555721124013, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07823350777228673, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07823350777228673, "signal/advantage_abs_mean": 0.0893336609005928, "signal/advantage_pre_scale_abs_mean": 0.0893336609005928, "signal/advantage_pre_scale_std": 0.1595475971698761, "signal/advantage_std": 0.1595475971698761, "signal/brier_reward/centered_abs_mean": 0.13335002462069193, "signal/brier_reward/group_std_mean": 0.177884042263031, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013335002275804678, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013335002275804678, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03668076234559218, "signal/confidence_uniqueness_reward/group_std_mean": 0.05635303258895874, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036680761259049177, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036680761259049177, "signal/format_reward/centered_abs_mean": 0.0151457612713178, "signal/format_reward/group_std_mean": 0.030251561353604, "signal/format_reward/group_zero_std_frac": 0.8703703681627909, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0075728806356589, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0075728806356589, "signal/frontier_coverage_0/centered_abs_mean": 0.12804659952720007, "signal/frontier_coverage_0/group_std_mean": 0.174424409866333, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018310664454475045, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018310664454475045, "signal/frontier_coverage_1/centered_abs_mean": 0.12804659952720007, "signal/frontier_coverage_1/group_std_mean": 0.174424409866333, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018310664454475045, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018310664454475045, "signal/frontier_coverage_10/centered_abs_mean": 0.11198657502730687, "signal/frontier_coverage_10/group_std_mean": 0.15388049681981406, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016014080417032044, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016014080417032044, "signal/frontier_coverage_15/centered_abs_mean": 0.08891634891430537, "signal/frontier_coverage_15/group_std_mean": 0.12195203453302383, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012715038610622287, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012715038610622287, "signal/frontier_coverage_20/centered_abs_mean": 0.07647461940844853, "signal/frontier_coverage_20/group_std_mean": 0.10173061241706212, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010935871008162696, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010935871008162696, "signal/frontier_coverage_25/centered_abs_mean": 0.1526160587867101, "signal/frontier_coverage_25/group_std_mean": 0.2031193325916926, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002182409632951021, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002182409632951021, "signal/frontier_coverage_5/centered_abs_mean": 0.12730942914883295, "signal/frontier_coverage_5/group_std_mean": 0.17352166771888733, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001820524805225432, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001820524805225432, "step": 208, "total_flos": 0.0, "train_loss": -0.00772521308625493, "train_runtime": 40973.9456, "train_samples_per_second": 0.366, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 457594673, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }