Files
RLCR-v4-ks-uniqueness-buf5k…/trainer_state.json
ModelHub XC 054f0df3a4 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-buf5k-noece-noaurc-cold-math
Source: Original Platform
2026-04-13 01:34:59 +08:00

5402 lines
333 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.49919376007799904,
"eval_steps": 50,
"global_step": 208,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.49858621709600043,
"calibration/batch_distribution_entropy": 0.27179949345286947,
"calibration/buffer_distribution_entropy": 0.2826936735263452,
"calibration/confidence_entropy": 0.22057572312827042,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.46227961186189803,
"calibration/mean_confidence": 0.9144884743892769,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02005208333333335,
"completions/max_length": 3998.4,
"completions/max_terminated_length": 3998.4,
"completions/mean_length": 516.7477416992188,
"completions/mean_terminated_length": 527.3296142578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.011999850001874977,
"grad_norm": 0.003858975600451231,
"learning_rate": 5.952380952380953e-07,
"loss": 0.0068,
"num_tokens": 9067142.0,
"reward": 0.5035168766975403,
"reward_std": 0.44194251894950864,
"rewards/accuracy_reward": 0.25572916567325593,
"rewards/brier_reward": 0.3094047784805298,
"rewards/confidence_uniqueness_reward": 0.28810680508613584,
"rewards/format_reward": 0.5986111044883728,
"rewards/frontier_coverage_0": 0.16578977052122354,
"rewards/frontier_coverage_1": 0.16578977052122354,
"rewards/frontier_coverage_10": 0.16578977052122354,
"rewards/frontier_coverage_15": 0.16578977052122354,
"rewards/frontier_coverage_20": 0.16578977052122354,
"rewards/frontier_coverage_25": 0.16578977052122354,
"rewards/frontier_coverage_5": 0.16578977052122354,
"signal/accuracy_reward/centered_abs_mean": 0.3037217855453491,
"signal/accuracy_reward/group_std_mean": 0.3625373482704163,
"signal/accuracy_reward/group_zero_std_frac": 0.10555555745959282,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15186089277267456,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15186089277267456,
"signal/advantage_abs_mean": 0.383181232213974,
"signal/advantage_pre_scale_abs_mean": 0.383181232213974,
"signal/advantage_pre_scale_std": 0.4454788088798523,
"signal/advantage_std": 0.4454788088798523,
"signal/brier_reward/centered_abs_mean": 0.31622138023376467,
"signal/brier_reward/group_std_mean": 0.36951943635940554,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03162213787436485,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.03162213787436485,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.237173992395401,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2888317406177521,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023717399314045907,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023717399314045907,
"signal/format_reward/centered_abs_mean": 0.43920356035232544,
"signal/format_reward/group_std_mean": 0.4745051324367523,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.21960178017616272,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.21960178017616272,
"signal/frontier_coverage_0/centered_abs_mean": 0.19068164750933647,
"signal/frontier_coverage_0/group_std_mean": 0.23141059912741185,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027267476194538175,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027267476194538175,
"signal/frontier_coverage_1/centered_abs_mean": 0.19068164750933647,
"signal/frontier_coverage_1/group_std_mean": 0.23141059912741185,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027267476194538175,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027267476194538175,
"signal/frontier_coverage_10/centered_abs_mean": 0.19068164750933647,
"signal/frontier_coverage_10/group_std_mean": 0.23141059912741185,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027267476194538175,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027267476194538175,
"signal/frontier_coverage_15/centered_abs_mean": 0.19068164750933647,
"signal/frontier_coverage_15/group_std_mean": 0.23141059912741185,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027267476194538175,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027267476194538175,
"signal/frontier_coverage_20/centered_abs_mean": 0.19068164750933647,
"signal/frontier_coverage_20/group_std_mean": 0.23141059912741185,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027267476194538175,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027267476194538175,
"signal/frontier_coverage_25/centered_abs_mean": 0.19068164750933647,
"signal/frontier_coverage_25/group_std_mean": 0.23141059912741185,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027267476194538175,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027267476194538175,
"signal/frontier_coverage_5/centered_abs_mean": 0.19068164750933647,
"signal/frontier_coverage_5/group_std_mean": 0.23141059912741185,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027267476194538175,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027267476194538175,
"step": 5
},
{
"calibration/aurc": 0.5048620587670756,
"calibration/batch_distribution_entropy": 0.23936490089336626,
"calibration/buffer_distribution_entropy": 0.276244326153706,
"calibration/confidence_entropy": 0.21518765364765557,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4767879771080269,
"calibration/mean_confidence": 0.9237536204358829,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01918402777777779,
"completions/max_length": 3910.2,
"completions/max_terminated_length": 3910.2,
"completions/mean_length": 477.1962585449219,
"completions/mean_terminated_length": 486.66339721679685,
"completions/min_length": 0.0,
"completions/min_terminated_length": 21.2,
"epoch": 0.023999700003749954,
"grad_norm": 0.03838086128234863,
"learning_rate": 1.1904761904761906e-06,
"loss": 0.0025,
"num_tokens": 17647163.0,
"reward": 0.5732499718666076,
"reward_std": 0.39466784000396726,
"rewards/accuracy_reward": 0.290625,
"rewards/brier_reward": 0.35456337332725524,
"rewards/confidence_uniqueness_reward": 0.35486308932304383,
"rewards/format_reward": 0.7128472208976746,
"rewards/frontier_coverage_0": 0.00570630207657814,
"rewards/frontier_coverage_1": 0.00570630207657814,
"rewards/frontier_coverage_10": 0.00570630207657814,
"rewards/frontier_coverage_15": 0.00570630207657814,
"rewards/frontier_coverage_20": 0.00570630207657814,
"rewards/frontier_coverage_25": 0.00570630207657814,
"rewards/frontier_coverage_5": 0.00570630207657814,
"signal/accuracy_reward/centered_abs_mean": 0.32489149570465087,
"signal/accuracy_reward/group_std_mean": 0.3819944679737091,
"signal/accuracy_reward/group_zero_std_frac": 0.0833333358168602,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16244574785232543,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.16244574785232543,
"signal/advantage_abs_mean": 0.32642056941986086,
"signal/advantage_pre_scale_abs_mean": 0.32642056941986086,
"signal/advantage_pre_scale_std": 0.3967562675476074,
"signal/advantage_std": 0.3967562675476074,
"signal/brier_reward/centered_abs_mean": 0.32057392597198486,
"signal/brier_reward/group_std_mean": 0.3732548654079437,
"signal/brier_reward/group_zero_std_frac": 0.002777777798473835,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.032057393342256546,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.032057393342256546,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.22309686243534088,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2798730194568634,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022309686988592148,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022309686988592148,
"signal/format_reward/centered_abs_mean": 0.3567274272441864,
"signal/format_reward/group_std_mean": 0.42118590474128725,
"signal/format_reward/group_zero_std_frac": 0.00555555559694767,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1783637136220932,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1783637136220932,
"signal/frontier_coverage_0/centered_abs_mean": 0.015685518644750117,
"signal/frontier_coverage_0/group_std_mean": 0.0333976186811924,
"signal/frontier_coverage_0/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00022430291573982686,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00022430291573982686,
"signal/frontier_coverage_1/centered_abs_mean": 0.015685518644750117,
"signal/frontier_coverage_1/group_std_mean": 0.0333976186811924,
"signal/frontier_coverage_1/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00022430291573982686,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00022430291573982686,
"signal/frontier_coverage_10/centered_abs_mean": 0.015685518644750117,
"signal/frontier_coverage_10/group_std_mean": 0.0333976186811924,
"signal/frontier_coverage_10/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00022430291573982686,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00022430291573982686,
"signal/frontier_coverage_15/centered_abs_mean": 0.015685518644750117,
"signal/frontier_coverage_15/group_std_mean": 0.0333976186811924,
"signal/frontier_coverage_15/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00022430291573982686,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00022430291573982686,
"signal/frontier_coverage_20/centered_abs_mean": 0.015685518644750117,
"signal/frontier_coverage_20/group_std_mean": 0.0333976186811924,
"signal/frontier_coverage_20/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00022430291573982686,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00022430291573982686,
"signal/frontier_coverage_25/centered_abs_mean": 0.015685518644750117,
"signal/frontier_coverage_25/group_std_mean": 0.0333976186811924,
"signal/frontier_coverage_25/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00022430291573982686,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00022430291573982686,
"signal/frontier_coverage_5/centered_abs_mean": 0.015685518644750117,
"signal/frontier_coverage_5/group_std_mean": 0.0333976186811924,
"signal/frontier_coverage_5/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00022430291573982686,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00022430291573982686,
"step": 10
},
{
"calibration/aurc": 0.5651461697219853,
"calibration/batch_distribution_entropy": 0.2858721407601498,
"calibration/buffer_distribution_entropy": 0.26435241204989707,
"calibration/confidence_entropy": 0.23515916341087234,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5243550099379826,
"calibration/mean_confidence": 0.9145028922999036,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011197916666666674,
"completions/max_length": 3814.8,
"completions/max_terminated_length": 3814.8,
"completions/mean_length": 415.96303100585936,
"completions/mean_terminated_length": 420.7157958984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 60.6,
"epoch": 0.03599955000562493,
"grad_norm": 0.001462470623664558,
"learning_rate": 1.7857142857142859e-06,
"loss": -0.0064,
"num_tokens": 25541041.0,
"reward": 0.7139440774917603,
"reward_std": 0.2866878867149353,
"rewards/accuracy_reward": 0.3111111164093018,
"rewards/brier_reward": 0.4146228313446045,
"rewards/confidence_uniqueness_reward": 0.5058956265449523,
"rewards/format_reward": 0.9306423544883728,
"rewards/frontier_coverage_0": 0.010144511703401804,
"rewards/frontier_coverage_1": 0.010144511703401804,
"rewards/frontier_coverage_10": 0.010144511703401804,
"rewards/frontier_coverage_15": 0.010144511703401804,
"rewards/frontier_coverage_20": 0.010144511703401804,
"rewards/frontier_coverage_25": 0.010144511703401804,
"rewards/frontier_coverage_5": 0.010144511703401804,
"signal/accuracy_reward/centered_abs_mean": 0.3240451455116272,
"signal/accuracy_reward/group_std_mean": 0.3799292385578156,
"signal/accuracy_reward/group_zero_std_frac": 0.09444444701075554,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1620225727558136,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1620225727558136,
"signal/advantage_abs_mean": 0.22506832480430602,
"signal/advantage_pre_scale_abs_mean": 0.22506832480430602,
"signal/advantage_pre_scale_std": 0.29292616844177244,
"signal/advantage_std": 0.29292616844177244,
"signal/brier_reward/centered_abs_mean": 0.30433117747306826,
"signal/brier_reward/group_std_mean": 0.3543997764587402,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030433119088411332,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.030433119088411332,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.17291399836540222,
"signal/confidence_uniqueness_reward/group_std_mean": 0.22697044014930726,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01729139983654022,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01729139983654022,
"signal/format_reward/centered_abs_mean": 0.11800672635436057,
"signal/format_reward/group_std_mean": 0.20241138935089112,
"signal/format_reward/group_zero_std_frac": 0.2583333317190409,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05900336317718029,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.05900336317718029,
"signal/frontier_coverage_0/centered_abs_mean": 0.021325640380382538,
"signal/frontier_coverage_0/group_std_mean": 0.04250783696770668,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00030495663813780995,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00030495663813780995,
"signal/frontier_coverage_1/centered_abs_mean": 0.021325640380382538,
"signal/frontier_coverage_1/group_std_mean": 0.04250783696770668,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00030495663813780995,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00030495663813780995,
"signal/frontier_coverage_10/centered_abs_mean": 0.021325640380382538,
"signal/frontier_coverage_10/group_std_mean": 0.04250783696770668,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00030495663813780995,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00030495663813780995,
"signal/frontier_coverage_15/centered_abs_mean": 0.021325640380382538,
"signal/frontier_coverage_15/group_std_mean": 0.04250783696770668,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00030495663813780995,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00030495663813780995,
"signal/frontier_coverage_20/centered_abs_mean": 0.021325640380382538,
"signal/frontier_coverage_20/group_std_mean": 0.04250783696770668,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00030495663813780995,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00030495663813780995,
"signal/frontier_coverage_25/centered_abs_mean": 0.021325640380382538,
"signal/frontier_coverage_25/group_std_mean": 0.04250783696770668,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00030495663813780995,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00030495663813780995,
"signal/frontier_coverage_5/centered_abs_mean": 0.021325640380382538,
"signal/frontier_coverage_5/group_std_mean": 0.04250783696770668,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00030495663813780995,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00030495663813780995,
"step": 15
},
{
"calibration/aurc": 0.4942660369668168,
"calibration/batch_distribution_entropy": 0.36107236461197073,
"calibration/buffer_distribution_entropy": 0.2866057794747389,
"calibration/confidence_entropy": 0.28890118485400956,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.03717277486910995,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4425994634048266,
"calibration/mean_confidence": 0.8945259513282633,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008767361111111116,
"completions/max_length": 3545.2,
"completions/max_terminated_length": 3545.2,
"completions/mean_length": 423.8654479980469,
"completions/mean_terminated_length": 427.660546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 68.8,
"epoch": 0.04799940000749991,
"grad_norm": 0.0008591993246227503,
"learning_rate": 2.380952380952381e-06,
"loss": -0.0082,
"num_tokens": 33537667.0,
"reward": 0.8044180393218994,
"reward_std": 0.2287308543920517,
"rewards/accuracy_reward": 0.4008680522441864,
"rewards/brier_reward": 0.5150173962116241,
"rewards/confidence_uniqueness_reward": 0.5862200736999512,
"rewards/format_reward": 0.9855034828186036,
"rewards/frontier_coverage_0": 0.011074092797935009,
"rewards/frontier_coverage_1": 0.011074092797935009,
"rewards/frontier_coverage_10": 0.011074092797935009,
"rewards/frontier_coverage_15": 0.011074092797935009,
"rewards/frontier_coverage_20": 0.011074092797935009,
"rewards/frontier_coverage_25": 0.011074092797935009,
"rewards/frontier_coverage_5": 0.011074092797935009,
"signal/accuracy_reward/centered_abs_mean": 0.2977321982383728,
"signal/accuracy_reward/group_std_mean": 0.3633489072322845,
"signal/accuracy_reward/group_zero_std_frac": 0.08333333432674409,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1488660991191864,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1488660991191864,
"signal/advantage_abs_mean": 0.18184916377067567,
"signal/advantage_pre_scale_abs_mean": 0.18184916377067567,
"signal/advantage_pre_scale_std": 0.23659501671791078,
"signal/advantage_std": 0.23659501671791078,
"signal/brier_reward/centered_abs_mean": 0.26825318336486814,
"signal/brier_reward/group_std_mean": 0.32477903366088867,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02682531885802746,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02682531885802746,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.16254269182682038,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1992730051279068,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01625426858663559,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01625426858663559,
"signal/format_reward/centered_abs_mean": 0.026285807229578496,
"signal/format_reward/group_std_mean": 0.05511143393814564,
"signal/format_reward/group_zero_std_frac": 0.7555555462837219,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013142903614789248,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013142903614789248,
"signal/frontier_coverage_0/centered_abs_mean": 0.02683491036295891,
"signal/frontier_coverage_0/group_std_mean": 0.04888941571116447,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0003837391850538552,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0003837391850538552,
"signal/frontier_coverage_1/centered_abs_mean": 0.02683491036295891,
"signal/frontier_coverage_1/group_std_mean": 0.04888941571116447,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0003837391850538552,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0003837391850538552,
"signal/frontier_coverage_10/centered_abs_mean": 0.02683491036295891,
"signal/frontier_coverage_10/group_std_mean": 0.04888941571116447,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0003837391850538552,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0003837391850538552,
"signal/frontier_coverage_15/centered_abs_mean": 0.02683491036295891,
"signal/frontier_coverage_15/group_std_mean": 0.04888941571116447,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0003837391850538552,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0003837391850538552,
"signal/frontier_coverage_20/centered_abs_mean": 0.02683491036295891,
"signal/frontier_coverage_20/group_std_mean": 0.04888941571116447,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0003837391850538552,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0003837391850538552,
"signal/frontier_coverage_25/centered_abs_mean": 0.02683491036295891,
"signal/frontier_coverage_25/group_std_mean": 0.04888941571116447,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0003837391850538552,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0003837391850538552,
"signal/frontier_coverage_5/centered_abs_mean": 0.02683491036295891,
"signal/frontier_coverage_5/group_std_mean": 0.04888941571116447,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0003837391850538552,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0003837391850538552,
"step": 20
},
{
"calibration/aurc": 0.39448058005388165,
"calibration/batch_distribution_entropy": 0.4518564097762634,
"calibration/buffer_distribution_entropy": 0.3448626963299991,
"calibration/confidence_entropy": 0.3213623682330117,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.016753926701570682,
"calibration/coverage@20%": 0.019895287958115182,
"calibration/coverage@25%": 0.12198952879581151,
"calibration/coverage@30%": 0.18481675392670155,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.3243279868719355,
"calibration/mean_confidence": 0.8755994903434999,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.007204861111111116,
"completions/max_length": 3844.8,
"completions/max_terminated_length": 3844.8,
"completions/mean_length": 468.8833435058594,
"completions/mean_terminated_length": 472.284619140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 99.2,
"epoch": 0.05999925000937488,
"grad_norm": 0.0007930905558168888,
"learning_rate": 2.9761904761904763e-06,
"loss": -0.0046,
"num_tokens": 42063651.0,
"reward": 0.8718119025230407,
"reward_std": 0.2136448562145233,
"rewards/accuracy_reward": 0.4987847149372101,
"rewards/brier_reward": 0.6132471799850464,
"rewards/confidence_uniqueness_reward": 0.6505587697029114,
"rewards/format_reward": 0.9907118201255798,
"rewards/frontier_coverage_0": 0.00682337733451277,
"rewards/frontier_coverage_1": 0.00682337733451277,
"rewards/frontier_coverage_10": 0.00682337733451277,
"rewards/frontier_coverage_15": 0.00682337733451277,
"rewards/frontier_coverage_20": 0.00682337733451277,
"rewards/frontier_coverage_25": 0.00682337733451277,
"rewards/frontier_coverage_5": 0.00682337733451277,
"signal/accuracy_reward/centered_abs_mean": 0.28725042939186096,
"signal/accuracy_reward/group_std_mean": 0.35440042018890383,
"signal/accuracy_reward/group_zero_std_frac": 0.08888889104127884,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14362521469593048,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14362521469593048,
"signal/advantage_abs_mean": 0.16881968677043915,
"signal/advantage_pre_scale_abs_mean": 0.16881968677043915,
"signal/advantage_pre_scale_std": 0.22591695785522461,
"signal/advantage_std": 0.22591695785522461,
"signal/brier_reward/centered_abs_mean": 0.23450190126895903,
"signal/brier_reward/group_std_mean": 0.28934155106544496,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02345018908381462,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02345018908381462,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1386233687400818,
"signal/confidence_uniqueness_reward/group_std_mean": 0.16758487224578858,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013862336613237857,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013862336613237857,
"signal/format_reward/centered_abs_mean": 0.017116970755159854,
"signal/format_reward/group_std_mean": 0.038513346761465075,
"signal/format_reward/group_zero_std_frac": 0.819444453716278,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008558485377579927,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008558485377579927,
"signal/frontier_coverage_0/centered_abs_mean": 0.030816724896430968,
"signal/frontier_coverage_0/group_std_mean": 0.05202222615480423,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00044067916460335257,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00044067916460335257,
"signal/frontier_coverage_1/centered_abs_mean": 0.030816724896430968,
"signal/frontier_coverage_1/group_std_mean": 0.05202222615480423,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00044067916460335257,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00044067916460335257,
"signal/frontier_coverage_10/centered_abs_mean": 0.030816724896430968,
"signal/frontier_coverage_10/group_std_mean": 0.05202222615480423,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00044067916460335257,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00044067916460335257,
"signal/frontier_coverage_15/centered_abs_mean": 0.030816724896430968,
"signal/frontier_coverage_15/group_std_mean": 0.05202222615480423,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00044067916460335257,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00044067916460335257,
"signal/frontier_coverage_20/centered_abs_mean": 0.030816724896430968,
"signal/frontier_coverage_20/group_std_mean": 0.05202222615480423,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00044067916460335257,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00044067916460335257,
"signal/frontier_coverage_25/centered_abs_mean": 0.030816724896430968,
"signal/frontier_coverage_25/group_std_mean": 0.05202222615480423,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00044067916460335257,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00044067916460335257,
"signal/frontier_coverage_5/centered_abs_mean": 0.030816724896430968,
"signal/frontier_coverage_5/group_std_mean": 0.05202222615480423,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00044067916460335257,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00044067916460335257,
"step": 25
},
{
"calibration/aurc": 0.28599355007968147,
"calibration/batch_distribution_entropy": 0.5729950289638938,
"calibration/buffer_distribution_entropy": 0.4415618909140395,
"calibration/confidence_entropy": 0.39785220307742414,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.013123359580052493,
"calibration/coverage@15%": 0.020935859580052493,
"calibration/coverage@20%": 0.16300624246293538,
"calibration/coverage@25%": 0.3446692071008016,
"calibration/coverage@30%": 0.5385029855643044,
"calibration/coverage@5%": 0.013123359580052493,
"calibration/ece": 0.18942492818507478,
"calibration/mean_confidence": 0.8361121732549478,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013020833333333325,
"completions/max_length": 3944.4,
"completions/max_terminated_length": 3944.4,
"completions/mean_length": 559.3470703125,
"completions/mean_terminated_length": 566.7720703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 122.8,
"epoch": 0.07199910001124986,
"grad_norm": 0.0005365905235521495,
"learning_rate": 3.5714285714285718e-06,
"loss": -0.0078,
"num_tokens": 51617249.0,
"reward": 0.9077984690666199,
"reward_std": 0.19471972584724426,
"rewards/accuracy_reward": 0.56171875,
"rewards/brier_reward": 0.6795339226722718,
"rewards/confidence_uniqueness_reward": 0.6588276028633118,
"rewards/format_reward": 0.9855034708976745,
"rewards/frontier_coverage_0": 0.003508235071785748,
"rewards/frontier_coverage_1": 0.003508235071785748,
"rewards/frontier_coverage_10": 0.003508235071785748,
"rewards/frontier_coverage_15": 0.003508235071785748,
"rewards/frontier_coverage_20": 0.003508235071785748,
"rewards/frontier_coverage_25": 0.003508235071785748,
"rewards/frontier_coverage_5": 0.003508235071785748,
"signal/accuracy_reward/centered_abs_mean": 0.24979926645755768,
"signal/accuracy_reward/group_std_mean": 0.31267011165618896,
"signal/accuracy_reward/group_zero_std_frac": 0.17500000447034836,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12489963322877884,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.12489963322877884,
"signal/advantage_abs_mean": 0.15053375214338302,
"signal/advantage_pre_scale_abs_mean": 0.15053375214338302,
"signal/advantage_pre_scale_std": 0.21564119458198547,
"signal/advantage_std": 0.21564119458198547,
"signal/brier_reward/centered_abs_mean": 0.1923845499753952,
"signal/brier_reward/group_std_mean": 0.24107061624526976,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019238455034792424,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.019238455034792424,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.15954148322343825,
"signal/confidence_uniqueness_reward/group_std_mean": 0.19074728488922119,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01595414914190769,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01595414914190769,
"signal/format_reward/centered_abs_mean": 0.02523328997194767,
"signal/format_reward/group_std_mean": 0.05074257925152779,
"signal/format_reward/group_zero_std_frac": 0.7777777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012616644985973835,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012616644985973835,
"signal/frontier_coverage_0/centered_abs_mean": 0.036959283798933026,
"signal/frontier_coverage_0/group_std_mean": 0.057681336998939514,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0005285177612677217,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0005285177612677217,
"signal/frontier_coverage_1/centered_abs_mean": 0.036959283798933026,
"signal/frontier_coverage_1/group_std_mean": 0.057681336998939514,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0005285177612677217,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0005285177612677217,
"signal/frontier_coverage_10/centered_abs_mean": 0.036959283798933026,
"signal/frontier_coverage_10/group_std_mean": 0.057681336998939514,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0005285177612677217,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0005285177612677217,
"signal/frontier_coverage_15/centered_abs_mean": 0.036959283798933026,
"signal/frontier_coverage_15/group_std_mean": 0.057681336998939514,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0005285177612677217,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0005285177612677217,
"signal/frontier_coverage_20/centered_abs_mean": 0.036959283798933026,
"signal/frontier_coverage_20/group_std_mean": 0.057681336998939514,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005285177612677217,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005285177612677217,
"signal/frontier_coverage_25/centered_abs_mean": 0.036959283798933026,
"signal/frontier_coverage_25/group_std_mean": 0.057681336998939514,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005285177612677217,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005285177612677217,
"signal/frontier_coverage_5/centered_abs_mean": 0.036959283798933026,
"signal/frontier_coverage_5/group_std_mean": 0.057681336998939514,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0005285177612677217,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0005285177612677217,
"step": 30
},
{
"calibration/aurc": 0.2632535671771489,
"calibration/batch_distribution_entropy": 0.6466923551545329,
"calibration/buffer_distribution_entropy": 0.5421351864917888,
"calibration/confidence_entropy": 0.44397516911608326,
"calibration/coverage@0%": 0.015845758641871894,
"calibration/coverage@1%": 0.015845758641871894,
"calibration/coverage@10%": 0.06649707894187398,
"calibration/coverage@15%": 0.1285361788638969,
"calibration/coverage@20%": 0.16434714020955804,
"calibration/coverage@25%": 0.38898227624249804,
"calibration/coverage@30%": 0.8153820641936578,
"calibration/coverage@5%": 0.015845758641871894,
"calibration/ece": 0.14365905416536864,
"calibration/mean_confidence": 0.8021350863697474,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015364583333333348,
"completions/max_length": 4040.8,
"completions/max_terminated_length": 4040.8,
"completions/mean_length": 634.3866333007812,
"completions/mean_terminated_length": 644.3625610351562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 179.6,
"epoch": 0.08399895001312484,
"grad_norm": 0.0005198422586545348,
"learning_rate": 4.166666666666667e-06,
"loss": -0.0098,
"num_tokens": 62002823.0,
"reward": 0.9421573758125306,
"reward_std": 0.17538723051548005,
"rewards/accuracy_reward": 0.6183159828186036,
"rewards/brier_reward": 0.7280090808868408,
"rewards/confidence_uniqueness_reward": 0.6922753095626831,
"rewards/format_reward": 0.9828993201255798,
"rewards/frontier_coverage_0": -0.004782191128470004,
"rewards/frontier_coverage_1": -0.004782191128470004,
"rewards/frontier_coverage_10": -0.004782191128470004,
"rewards/frontier_coverage_15": -0.004782191128470004,
"rewards/frontier_coverage_20": -0.004782191128470004,
"rewards/frontier_coverage_25": -0.004782191128470004,
"rewards/frontier_coverage_5": -0.004782191128470004,
"signal/accuracy_reward/centered_abs_mean": 0.21829969584941863,
"signal/accuracy_reward/group_std_mean": 0.27772454619407655,
"signal/accuracy_reward/group_zero_std_frac": 0.25,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10914984792470932,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10914984792470932,
"signal/advantage_abs_mean": 0.13406893461942673,
"signal/advantage_pre_scale_abs_mean": 0.13406893461942673,
"signal/advantage_pre_scale_std": 0.20243431627750397,
"signal/advantage_std": 0.20243431627750397,
"signal/brier_reward/centered_abs_mean": 0.1590863436460495,
"signal/brier_reward/group_std_mean": 0.20434601306915284,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01590863484889269,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01590863484889269,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.12657882273197174,
"signal/confidence_uniqueness_reward/group_std_mean": 0.15774886459112167,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01265788208693266,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01265788208693266,
"signal/format_reward/centered_abs_mean": 0.02765299491584301,
"signal/format_reward/group_std_mean": 0.04936157241463661,
"signal/format_reward/group_zero_std_frac": 0.8083333492279052,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013826497457921505,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013826497457921505,
"signal/frontier_coverage_0/centered_abs_mean": 0.046808502078056334,
"signal/frontier_coverage_0/group_std_mean": 0.06622445359826087,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0006693615694530308,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0006693615694530308,
"signal/frontier_coverage_1/centered_abs_mean": 0.046808502078056334,
"signal/frontier_coverage_1/group_std_mean": 0.06622445359826087,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0006693615694530308,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0006693615694530308,
"signal/frontier_coverage_10/centered_abs_mean": 0.046808502078056334,
"signal/frontier_coverage_10/group_std_mean": 0.06622445359826087,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0006693615694530308,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0006693615694530308,
"signal/frontier_coverage_15/centered_abs_mean": 0.046808502078056334,
"signal/frontier_coverage_15/group_std_mean": 0.06622445359826087,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0006693615694530308,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0006693615694530308,
"signal/frontier_coverage_20/centered_abs_mean": 0.046808502078056334,
"signal/frontier_coverage_20/group_std_mean": 0.06622445359826087,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006693615694530308,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006693615694530308,
"signal/frontier_coverage_25/centered_abs_mean": 0.046808502078056334,
"signal/frontier_coverage_25/group_std_mean": 0.06622445359826087,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006693615694530308,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006693615694530308,
"signal/frontier_coverage_5/centered_abs_mean": 0.046808502078056334,
"signal/frontier_coverage_5/group_std_mean": 0.06622445359826087,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0006693615694530308,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0006693615694530308,
"step": 35
},
{
"calibration/aurc": 0.2676519777027838,
"calibration/batch_distribution_entropy": 0.7054325857041888,
"calibration/buffer_distribution_entropy": 0.6344972967811773,
"calibration/confidence_entropy": 0.4799178907927473,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0599751997795536,
"calibration/coverage@20%": 0.28643587561566747,
"calibration/coverage@25%": 0.4707250139453013,
"calibration/coverage@30%": 0.6129981332198531,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.12413084652207873,
"calibration/mean_confidence": 0.7693312154212686,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013541666666666697,
"completions/max_length": 3869.6,
"completions/max_terminated_length": 3869.6,
"completions/mean_length": 690.2263916015625,
"completions/mean_terminated_length": 699.692431640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 182.0,
"epoch": 0.09599880001499982,
"grad_norm": 0.0004507621633820236,
"learning_rate": 4.761904761904762e-06,
"loss": -0.01,
"num_tokens": 73073751.0,
"reward": 0.9667992949485779,
"reward_std": 0.15924489200115205,
"rewards/accuracy_reward": 0.6505208253860474,
"rewards/brier_reward": 0.754754900932312,
"rewards/confidence_uniqueness_reward": 0.7449337363243103,
"rewards/format_reward": 0.9853298664093018,
"rewards/frontier_coverage_0": -0.011506949504837393,
"rewards/frontier_coverage_1": -0.011506949504837393,
"rewards/frontier_coverage_10": -0.011506949504837393,
"rewards/frontier_coverage_15": -0.011506949504837393,
"rewards/frontier_coverage_20": -0.011012806138023735,
"rewards/frontier_coverage_25": -0.008019896177574991,
"rewards/frontier_coverage_5": -0.011506949504837393,
"signal/accuracy_reward/centered_abs_mean": 0.18986544907093048,
"signal/accuracy_reward/group_std_mean": 0.2536569803953171,
"signal/accuracy_reward/group_zero_std_frac": 0.2777777761220932,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09493272453546524,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09493272453546524,
"signal/advantage_abs_mean": 0.1158403992652893,
"signal/advantage_pre_scale_abs_mean": 0.1158403992652893,
"signal/advantage_pre_scale_std": 0.18982842862606047,
"signal/advantage_std": 0.18982842862606047,
"signal/brier_reward/centered_abs_mean": 0.14530260264873504,
"signal/brier_reward/group_std_mean": 0.18963007628917694,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014530261047184467,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014530261047184467,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08863844275474549,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1167033389210701,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008863845001906156,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008863845001906156,
"signal/format_reward/centered_abs_mean": 0.02526584193110466,
"signal/format_reward/group_std_mean": 0.04484616741538048,
"signal/format_reward/group_zero_std_frac": 0.8277777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01263292096555233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01263292096555233,
"signal/frontier_coverage_0/centered_abs_mean": 0.06523959785699844,
"signal/frontier_coverage_0/group_std_mean": 0.08977894186973571,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0009329262189567089,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0009329262189567089,
"signal/frontier_coverage_1/centered_abs_mean": 0.06523959785699844,
"signal/frontier_coverage_1/group_std_mean": 0.08977894186973571,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0009329262189567089,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0009329262189567089,
"signal/frontier_coverage_10/centered_abs_mean": 0.06523959785699844,
"signal/frontier_coverage_10/group_std_mean": 0.08977894186973571,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0009329262189567089,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009329262189567089,
"signal/frontier_coverage_15/centered_abs_mean": 0.06523959785699844,
"signal/frontier_coverage_15/group_std_mean": 0.08977894186973571,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009329262189567089,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009329262189567089,
"signal/frontier_coverage_20/centered_abs_mean": 0.06372052878141403,
"signal/frontier_coverage_20/group_std_mean": 0.08785968273878098,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009112035506404937,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009112035506404937,
"signal/frontier_coverage_25/centered_abs_mean": 0.05406465157866478,
"signal/frontier_coverage_25/group_std_mean": 0.07568821161985398,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007731245132163167,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007731245132163167,
"signal/frontier_coverage_5/centered_abs_mean": 0.06523959785699844,
"signal/frontier_coverage_5/group_std_mean": 0.08977894186973571,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0009329262189567089,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0009329262189567089,
"step": 40
},
{
"calibration/aurc": 0.23071652949504246,
"calibration/batch_distribution_entropy": 0.7055601707639283,
"calibration/buffer_distribution_entropy": 0.6936541419751034,
"calibration/confidence_entropy": 0.4729729373365611,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0061111111111111106,
"calibration/coverage@15%": 0.06558502555601603,
"calibration/coverage@20%": 0.2877821522309711,
"calibration/coverage@25%": 0.69798687354932,
"calibration/coverage@30%": 0.9863517060367453,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.10290537054204563,
"calibration/mean_confidence": 0.7594771358212933,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014409722222222188,
"completions/max_length": 3817.8,
"completions/max_terminated_length": 3817.8,
"completions/mean_length": 731.4316040039063,
"completions/mean_terminated_length": 742.1711059570313,
"completions/min_length": 0.0,
"completions/min_terminated_length": 232.6,
"epoch": 0.1079986500168748,
"grad_norm": 0.00046814393135719,
"learning_rate": 4.909638554216868e-06,
"loss": -0.0112,
"num_tokens": 84635107.0,
"reward": 0.9677613854408265,
"reward_std": 0.15975097417831421,
"rewards/accuracy_reward": 0.6506076574325561,
"rewards/brier_reward": 0.7581860542297363,
"rewards/confidence_uniqueness_reward": 0.7537668347358704,
"rewards/format_reward": 0.984375,
"rewards/frontier_coverage_0": -0.011279890162404627,
"rewards/frontier_coverage_1": -0.011279890162404627,
"rewards/frontier_coverage_10": -0.011279890162404627,
"rewards/frontier_coverage_15": -0.011279890162404627,
"rewards/frontier_coverage_20": -0.007542286452371627,
"rewards/frontier_coverage_25": -0.0007611054461449385,
"rewards/frontier_coverage_5": -0.011279890162404627,
"signal/accuracy_reward/centered_abs_mean": 0.19601236879825593,
"signal/accuracy_reward/group_std_mean": 0.25668974220752716,
"signal/accuracy_reward/group_zero_std_frac": 0.27777778506278994,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09800618439912796,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09800618439912796,
"signal/advantage_abs_mean": 0.11878292560577393,
"signal/advantage_pre_scale_abs_mean": 0.11878292560577393,
"signal/advantage_pre_scale_std": 0.1898341953754425,
"signal/advantage_std": 0.1898341953754425,
"signal/brier_reward/centered_abs_mean": 0.1457345962524414,
"signal/brier_reward/group_std_mean": 0.19106005132198334,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014573459327220917,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014573459327220917,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.10429143160581589,
"signal/confidence_uniqueness_reward/group_std_mean": 0.13065478056669236,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010429143160581588,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010429143160581588,
"signal/format_reward/centered_abs_mean": 0.02532552070915699,
"signal/format_reward/group_std_mean": 0.04548909664154053,
"signal/format_reward/group_zero_std_frac": 0.819444453716278,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012662760354578495,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012662760354578495,
"signal/frontier_coverage_0/centered_abs_mean": 0.07666564732789993,
"signal/frontier_coverage_0/group_std_mean": 0.10538152903318405,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001096318766940385,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001096318766940385,
"signal/frontier_coverage_1/centered_abs_mean": 0.07666564732789993,
"signal/frontier_coverage_1/group_std_mean": 0.10538152903318405,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001096318766940385,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001096318766940385,
"signal/frontier_coverage_10/centered_abs_mean": 0.07666564732789993,
"signal/frontier_coverage_10/group_std_mean": 0.10538152903318405,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001096318766940385,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001096318766940385,
"signal/frontier_coverage_15/centered_abs_mean": 0.07666564732789993,
"signal/frontier_coverage_15/group_std_mean": 0.10538152903318405,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001096318766940385,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001096318766940385,
"signal/frontier_coverage_20/centered_abs_mean": 0.0640810675919056,
"signal/frontier_coverage_20/group_std_mean": 0.08941369652748107,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000916359294205904,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000916359294205904,
"signal/frontier_coverage_25/centered_abs_mean": 0.04283785969018936,
"signal/frontier_coverage_25/group_std_mean": 0.062041699141263965,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006125814048573375,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006125814048573375,
"signal/frontier_coverage_5/centered_abs_mean": 0.07666564732789993,
"signal/frontier_coverage_5/group_std_mean": 0.10538152903318405,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001096318766940385,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001096318766940385,
"step": 45
},
{
"calibration/aurc": 0.39586401271300875,
"calibration/batch_distribution_entropy": 0.7615897964351726,
"calibration/buffer_distribution_entropy": 0.7270643773427474,
"calibration/confidence_entropy": 0.5045923336911955,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.012635389036251105,
"calibration/coverage@15%": 0.013696396993810787,
"calibration/coverage@20%": 0.023667813000846843,
"calibration/coverage@25%": 0.03044551748633497,
"calibration/coverage@30%": 0.15728762274949287,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.21160105286348876,
"calibration/mean_confidence": 0.7227722715219795,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00737847222222221,
"completions/max_length": 3473.0,
"completions/max_terminated_length": 3473.0,
"completions/mean_length": 759.92587890625,
"completions/mean_terminated_length": 765.5557373046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 232.0,
"epoch": 0.11999850001874976,
"grad_norm": 0.0005435345810838044,
"learning_rate": 4.759036144578314e-06,
"loss": -0.0043,
"num_tokens": 96487053.0,
"reward": 0.9660153031349182,
"reward_std": 0.1462629795074463,
"rewards/accuracy_reward": 0.6291666626930237,
"rewards/brier_reward": 0.7552559852600098,
"rewards/confidence_uniqueness_reward": 0.7993631482124328,
"rewards/format_reward": 0.9925347208976746,
"rewards/frontier_coverage_0": -0.00474322558275162,
"rewards/frontier_coverage_1": -0.00474322558275162,
"rewards/frontier_coverage_10": -0.00474322558275162,
"rewards/frontier_coverage_15": -0.00474322558275162,
"rewards/frontier_coverage_20": -0.0008788239560090005,
"rewards/frontier_coverage_25": 0.0038009291049093006,
"rewards/frontier_coverage_5": -0.00474322558275162,
"signal/accuracy_reward/centered_abs_mean": 0.18924696147441863,
"signal/accuracy_reward/group_std_mean": 0.2494819164276123,
"signal/accuracy_reward/group_zero_std_frac": 0.29166667759418485,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09462348073720932,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09462348073720932,
"signal/advantage_abs_mean": 0.10812054872512818,
"signal/advantage_pre_scale_abs_mean": 0.10812054872512818,
"signal/advantage_pre_scale_std": 0.17321833372116088,
"signal/advantage_std": 0.17321833372116088,
"signal/brier_reward/centered_abs_mean": 0.13809363842010497,
"signal/brier_reward/group_std_mean": 0.18017106652259826,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013809364847838878,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013809364847838878,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.079685477912426,
"signal/confidence_uniqueness_reward/group_std_mean": 0.10326177328824997,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00796854794025421,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00796854794025421,
"signal/format_reward/centered_abs_mean": 0.013270399440079928,
"signal/format_reward/group_std_mean": 0.02645639069378376,
"signal/format_reward/group_zero_std_frac": 0.8861111164093017,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006635199720039964,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006635199720039964,
"signal/frontier_coverage_0/centered_abs_mean": 0.08481028228998184,
"signal/frontier_coverage_0/group_std_mean": 0.11866706758737564,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012127869995310903,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012127869995310903,
"signal/frontier_coverage_1/centered_abs_mean": 0.08481028228998184,
"signal/frontier_coverage_1/group_std_mean": 0.11866706758737564,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012127869995310903,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012127869995310903,
"signal/frontier_coverage_10/centered_abs_mean": 0.08481028228998184,
"signal/frontier_coverage_10/group_std_mean": 0.11866706758737564,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012127869995310903,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012127869995310903,
"signal/frontier_coverage_15/centered_abs_mean": 0.08481028228998184,
"signal/frontier_coverage_15/group_std_mean": 0.11866706758737564,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012127869995310903,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012127869995310903,
"signal/frontier_coverage_20/centered_abs_mean": 0.07179783061146736,
"signal/frontier_coverage_20/group_std_mean": 0.10189384371042251,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001026709016878158,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001026709016878158,
"signal/frontier_coverage_25/centered_abs_mean": 0.046491443365812304,
"signal/frontier_coverage_25/group_std_mean": 0.06868501603603364,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006648276466876268,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006648276466876268,
"signal/frontier_coverage_5/centered_abs_mean": 0.08481028228998184,
"signal/frontier_coverage_5/group_std_mean": 0.11866706758737564,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012127869995310903,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012127869995310903,
"step": 50
},
{
"epoch": 0.11999850001874976,
"eval_calibration/aurc": 0.2776295888815277,
"eval_calibration/batch_distribution_entropy": 0.7319698598432759,
"eval_calibration/buffer_distribution_entropy": 0.7449058358345398,
"eval_calibration/confidence_entropy": 0.5090239030799538,
"eval_calibration/coverage@0%": 0.08854166666666667,
"eval_calibration/coverage@1%": 0.08854166666666667,
"eval_calibration/coverage@10%": 0.171875,
"eval_calibration/coverage@15%": 0.2604166666666667,
"eval_calibration/coverage@20%": 0.2708333333333333,
"eval_calibration/coverage@25%": 0.3541666666666667,
"eval_calibration/coverage@30%": 0.6145833333333334,
"eval_calibration/coverage@5%": 0.08854166666666667,
"eval_calibration/ece": 0.165546875,
"eval_calibration/mean_confidence": 0.7360677083333335,
"eval_completions/clipped_ratio": 0.006944444444444438,
"eval_completions/max_length": 2543.1666666666665,
"eval_completions/max_terminated_length": 2543.1666666666665,
"eval_completions/mean_length": 727.5720621744791,
"eval_completions/mean_terminated_length": 732.6880798339844,
"eval_completions/min_length": 45.833333333333336,
"eval_completions/min_terminated_length": 270.3333333333333,
"eval_loss": 0.0,
"eval_num_tokens": 96487053.0,
"eval_reward": 0.968879888455073,
"eval_reward_std": 0.2623755360643069,
"eval_rewards/accuracy_reward": 0.6397569378217062,
"eval_rewards/brier_reward": 0.7657919128735861,
"eval_rewards/confidence_uniqueness_reward": 0.7643194397290548,
"eval_rewards/format_reward": 0.9921875,
"eval_rewards/frontier_coverage_0": -0.002180379558315811,
"eval_rewards/frontier_coverage_1": -0.002180379558315811,
"eval_rewards/frontier_coverage_10": -0.002180379558315811,
"eval_rewards/frontier_coverage_15": -0.001989841514538663,
"eval_rewards/frontier_coverage_20": -0.0009192682919092476,
"eval_rewards/frontier_coverage_25": 0.00439577810660315,
"eval_rewards/frontier_coverage_5": -0.002180379558315811,
"eval_runtime": 204.3039,
"eval_samples_per_second": 4.895,
"eval_signal/accuracy_reward/centered_abs_mean": 0.44677734375,
"eval_signal/accuracy_reward/group_std_mean": 0.47931412359078723,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.223388671875,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.223388671875,
"eval_signal/advantage_abs_mean": 0.23765324552853903,
"eval_signal/advantage_pre_scale_abs_mean": 0.23765324552853903,
"eval_signal/advantage_pre_scale_std": 0.2600039492050807,
"eval_signal/advantage_std": 0.2600039492050807,
"eval_signal/brier_reward/centered_abs_mean": 0.21157046655813852,
"eval_signal/brier_reward/group_std_mean": 0.26279614369074505,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021157047400871914,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.021157047400871914,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.10844459633032481,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.13759969919919968,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010844459757208824,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010844459757208824,
"eval_signal/format_reward/centered_abs_mean": 0.015136718284338713,
"eval_signal/format_reward/group_std_mean": 0.044194173999130726,
"eval_signal/format_reward/group_zero_std_frac": 0.7500000298023224,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007568359142169356,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359142169356,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.10852197060982387,
"eval_signal/frontier_coverage_0/group_std_mean": 0.16503738115231195,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015518641448579729,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015518641448579729,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.10852197060982387,
"eval_signal/frontier_coverage_1/group_std_mean": 0.16503738115231195,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015518641448579729,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015518641448579729,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.10852197060982387,
"eval_signal/frontier_coverage_10/group_std_mean": 0.16503738115231195,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015518641448579729,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015518641448579729,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.10724649329980214,
"eval_signal/frontier_coverage_15/group_std_mean": 0.16348060965538025,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015336248131158452,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015336248131158452,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.09694457550843556,
"eval_signal/frontier_coverage_20/group_std_mean": 0.14916668087244034,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001386307393355916,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001386307393355916,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.06621957384049892,
"eval_signal/frontier_coverage_25/group_std_mean": 0.10746484374006589,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009469399325704823,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009469399325704823,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.10852197060982387,
"eval_signal/frontier_coverage_5/group_std_mean": 0.16503738115231195,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015518641448579729,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015518641448579729,
"eval_steps_per_second": 0.029,
"step": 50
},
{
"calibration/aurc": 0.31649640361405623,
"calibration/batch_distribution_entropy": 0.8044629368041323,
"calibration/buffer_distribution_entropy": 0.7632590824091535,
"calibration/confidence_entropy": 0.527233352329629,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.021354166666666667,
"calibration/coverage@15%": 0.029228182414698163,
"calibration/coverage@20%": 0.10320702099737533,
"calibration/coverage@25%": 0.3614583333333333,
"calibration/coverage@30%": 0.40374331550802134,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.13912997885501577,
"calibration/mean_confidence": 0.704141535873572,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006250000000000022,
"completions/max_length": 3575.8,
"completions/max_terminated_length": 3575.8,
"completions/mean_length": 764.2988037109375,
"completions/mean_terminated_length": 769.2012573242188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 205.4,
"epoch": 0.13199835002062474,
"grad_norm": 0.00045138923451304436,
"learning_rate": 4.60843373493976e-06,
"loss": -0.0045,
"num_tokens": 108372351.0,
"reward": 0.9776891589164733,
"reward_std": 0.14157166481018066,
"rewards/accuracy_reward": 0.6438368082046508,
"rewards/brier_reward": 0.7670759439468384,
"rewards/confidence_uniqueness_reward": 0.8284211039543152,
"rewards/format_reward": 0.9937500119209289,
"rewards/frontier_coverage_0": -0.008810842200182379,
"rewards/frontier_coverage_1": -0.008810842200182379,
"rewards/frontier_coverage_10": -0.008810842200182379,
"rewards/frontier_coverage_15": -0.007830455573275686,
"rewards/frontier_coverage_20": -0.004610662302002311,
"rewards/frontier_coverage_25": 0.0019515341526130214,
"rewards/frontier_coverage_5": -0.008810842200182379,
"signal/accuracy_reward/centered_abs_mean": 0.18646375834941864,
"signal/accuracy_reward/group_std_mean": 0.24433983564376832,
"signal/accuracy_reward/group_zero_std_frac": 0.31666667461395265,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09323187917470932,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09323187917470932,
"signal/advantage_abs_mean": 0.10541787147521972,
"signal/advantage_pre_scale_abs_mean": 0.10541787147521972,
"signal/advantage_pre_scale_std": 0.16827026903629302,
"signal/advantage_std": 0.16827026903629302,
"signal/brier_reward/centered_abs_mean": 0.13579574525356292,
"signal/brier_reward/group_std_mean": 0.1776826024055481,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013579574786126614,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013579574786126614,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07823738157749176,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1013486623764038,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007823738548904658,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007823738548904658,
"signal/format_reward/centered_abs_mean": 0.01110026049427688,
"signal/format_reward/group_std_mean": 0.0223752673715353,
"signal/format_reward/group_zero_std_frac": 0.9027777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00555013024713844,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00555013024713844,
"signal/frontier_coverage_0/centered_abs_mean": 0.09882133305072785,
"signal/frontier_coverage_0/group_std_mean": 0.13713002651929856,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014131450327113271,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014131450327113271,
"signal/frontier_coverage_1/centered_abs_mean": 0.09882133305072785,
"signal/frontier_coverage_1/group_std_mean": 0.13713002651929856,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014131450327113271,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014131450327113271,
"signal/frontier_coverage_10/centered_abs_mean": 0.09882133305072785,
"signal/frontier_coverage_10/group_std_mean": 0.13713002651929856,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014131450327113271,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014131450327113271,
"signal/frontier_coverage_15/centered_abs_mean": 0.09499142318964005,
"signal/frontier_coverage_15/group_std_mean": 0.13223906755447387,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013583773747086526,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013583773747086526,
"signal/frontier_coverage_20/centered_abs_mean": 0.08681065887212754,
"signal/frontier_coverage_20/group_std_mean": 0.12124353647232056,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012413924559950829,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012413924559950829,
"signal/frontier_coverage_25/centered_abs_mean": 0.06926739811897278,
"signal/frontier_coverage_25/group_std_mean": 0.09777135252952576,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009905237704515458,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009905237704515458,
"signal/frontier_coverage_5/centered_abs_mean": 0.09882133305072785,
"signal/frontier_coverage_5/group_std_mean": 0.13713002651929856,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014131450327113271,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014131450327113271,
"step": 55
},
{
"calibration/aurc": 0.3382260111628641,
"calibration/batch_distribution_entropy": 0.8498678716551673,
"calibration/buffer_distribution_entropy": 0.8051509048121346,
"calibration/confidence_entropy": 0.5316433536264733,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.042708333333333334,
"calibration/coverage@15%": 0.2808253627968338,
"calibration/coverage@20%": 0.3194285460613261,
"calibration/coverage@25%": 0.36942854606132614,
"calibration/coverage@30%": 0.46338289628488666,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.1688721630430574,
"calibration/mean_confidence": 0.6809634581476074,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0057291666666666515,
"completions/max_length": 3641.0,
"completions/max_terminated_length": 3641.0,
"completions/mean_length": 771.4474853515625,
"completions/mean_terminated_length": 775.8955322265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 225.6,
"epoch": 0.14399820002249972,
"grad_norm": 0.0004424219368956983,
"learning_rate": 4.457831325301205e-06,
"loss": -0.0032,
"num_tokens": 120356002.0,
"reward": 0.9673421621322632,
"reward_std": 0.14665516316890717,
"rewards/accuracy_reward": 0.6087673544883728,
"rewards/brier_reward": 0.7633313059806823,
"rewards/confidence_uniqueness_reward": 0.8849505186080933,
"rewards/format_reward": 0.9940104007720947,
"rewards/frontier_coverage_0": 0.010127071291208267,
"rewards/frontier_coverage_1": 0.010127071291208267,
"rewards/frontier_coverage_10": 0.010127071291208267,
"rewards/frontier_coverage_15": 0.010443565156310796,
"rewards/frontier_coverage_20": 0.012819062476046384,
"rewards/frontier_coverage_25": 0.0149055490270257,
"rewards/frontier_coverage_5": 0.010127071291208267,
"signal/accuracy_reward/centered_abs_mean": 0.19885525107383728,
"signal/accuracy_reward/group_std_mean": 0.2586120396852493,
"signal/accuracy_reward/group_zero_std_frac": 0.28333333134651184,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09942762553691864,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09942762553691864,
"signal/advantage_abs_mean": 0.10986414402723313,
"signal/advantage_pre_scale_abs_mean": 0.10986414402723313,
"signal/advantage_pre_scale_std": 0.17161572575569153,
"signal/advantage_std": 0.17161572575569153,
"signal/brier_reward/centered_abs_mean": 0.1456875115633011,
"signal/brier_reward/group_std_mean": 0.18925343751907348,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01456875205039978,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01456875205039978,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07028612047433853,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0925293281674385,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007028611935675144,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007028611935675144,
"signal/format_reward/centered_abs_mean": 0.01108398474752903,
"signal/format_reward/group_std_mean": 0.025465189665555953,
"signal/format_reward/group_zero_std_frac": 0.8777777791023255,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005541992373764515,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005541992373764515,
"signal/frontier_coverage_0/centered_abs_mean": 0.11798207312822342,
"signal/frontier_coverage_0/group_std_mean": 0.1616940289735794,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016871436731889845,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016871436731889845,
"signal/frontier_coverage_1/centered_abs_mean": 0.11798207312822342,
"signal/frontier_coverage_1/group_std_mean": 0.1616940289735794,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016871436731889845,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016871436731889845,
"signal/frontier_coverage_10/centered_abs_mean": 0.11798207312822342,
"signal/frontier_coverage_10/group_std_mean": 0.1616940289735794,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016871436731889845,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016871436731889845,
"signal/frontier_coverage_15/centered_abs_mean": 0.11572056114673615,
"signal/frontier_coverage_15/group_std_mean": 0.15887772738933564,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016548039624467493,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016548039624467493,
"signal/frontier_coverage_20/centered_abs_mean": 0.10319755375385284,
"signal/frontier_coverage_20/group_std_mean": 0.14280655086040497,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014757250202819705,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014757250202819705,
"signal/frontier_coverage_25/centered_abs_mean": 0.08172965943813323,
"signal/frontier_coverage_25/group_std_mean": 0.11435115933418274,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011687340680509805,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011687340680509805,
"signal/frontier_coverage_5/centered_abs_mean": 0.11798207312822342,
"signal/frontier_coverage_5/group_std_mean": 0.1616940289735794,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016871436731889845,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016871436731889845,
"step": 60
},
{
"calibration/aurc": 0.24587685737231602,
"calibration/batch_distribution_entropy": 0.8593260113425367,
"calibration/buffer_distribution_entropy": 0.8419197575160856,
"calibration/confidence_entropy": 0.5353930508126851,
"calibration/coverage@0%": 0.017225576588337684,
"calibration/coverage@1%": 0.017225576588337684,
"calibration/coverage@10%": 0.175674499564839,
"calibration/coverage@15%": 0.4178592798085291,
"calibration/coverage@20%": 0.5347761640557007,
"calibration/coverage@25%": 0.64177545691906,
"calibration/coverage@30%": 0.7441253263707572,
"calibration/coverage@5%": 0.017747769799825935,
"calibration/ece": 0.14085569002805973,
"calibration/mean_confidence": 0.6692040214761524,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.002777777777777768,
"completions/max_length": 3182.6,
"completions/max_terminated_length": 3182.6,
"completions/mean_length": 761.7659790039063,
"completions/mean_terminated_length": 763.8692138671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 215.8,
"epoch": 0.1559980500243747,
"grad_norm": 0.0004698596312664449,
"learning_rate": 4.307228915662651e-06,
"loss": 0.0001,
"num_tokens": 132225594.0,
"reward": 0.995530652999878,
"reward_std": 0.13234637379646302,
"rewards/accuracy_reward": 0.64921875,
"rewards/brier_reward": 0.785097849369049,
"rewards/confidence_uniqueness_reward": 0.9373332023620605,
"rewards/format_reward": 0.9971354007720947,
"rewards/frontier_coverage_0": -0.002034256886690855,
"rewards/frontier_coverage_1": -0.002034256886690855,
"rewards/frontier_coverage_10": -0.002034256886690855,
"rewards/frontier_coverage_15": -0.0016045193187892437,
"rewards/frontier_coverage_20": 0.0038384980522096156,
"rewards/frontier_coverage_25": 0.013624860998243093,
"rewards/frontier_coverage_5": -0.002034256886690855,
"signal/accuracy_reward/centered_abs_mean": 0.18282877504825593,
"signal/accuracy_reward/group_std_mean": 0.24428035020828248,
"signal/accuracy_reward/group_zero_std_frac": 0.29722222685813904,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09141438752412796,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09141438752412796,
"signal/advantage_abs_mean": 0.09773297160863877,
"signal/advantage_pre_scale_abs_mean": 0.09773297160863877,
"signal/advantage_pre_scale_std": 0.1567333608865738,
"signal/advantage_std": 0.1567333608865738,
"signal/brier_reward/centered_abs_mean": 0.13010090589523315,
"signal/brier_reward/group_std_mean": 0.1674443781375885,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01301009114831686,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01301009114831686,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03651793897151947,
"signal/confidence_uniqueness_reward/group_std_mean": 0.051064802706241606,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003651794046163559,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003651794046163559,
"signal/format_reward/centered_abs_mean": 0.005430772574618459,
"signal/format_reward/group_std_mean": 0.013679004088044167,
"signal/format_reward/group_zero_std_frac": 0.9305555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0027153862873092295,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0027153862873092295,
"signal/frontier_coverage_0/centered_abs_mean": 0.12229467630386352,
"signal/frontier_coverage_0/group_std_mean": 0.16521598398685455,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017488139681518077,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017488139681518077,
"signal/frontier_coverage_1/centered_abs_mean": 0.12229467630386352,
"signal/frontier_coverage_1/group_std_mean": 0.16521598398685455,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017488139681518077,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017488139681518077,
"signal/frontier_coverage_10/centered_abs_mean": 0.12229467630386352,
"signal/frontier_coverage_10/group_std_mean": 0.16521598398685455,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017488139681518077,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017488139681518077,
"signal/frontier_coverage_15/centered_abs_mean": 0.12053841352462769,
"signal/frontier_coverage_15/group_std_mean": 0.16303691267967224,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017236994579434394,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017236994579434394,
"signal/frontier_coverage_20/centered_abs_mean": 0.09820731431245804,
"signal/frontier_coverage_20/group_std_mean": 0.13474227339029313,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014043646398931742,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014043646398931742,
"signal/frontier_coverage_25/centered_abs_mean": 0.07332679852843285,
"signal/frontier_coverage_25/group_std_mean": 0.10180476605892182,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010485732345841825,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010485732345841825,
"signal/frontier_coverage_5/centered_abs_mean": 0.12229467630386352,
"signal/frontier_coverage_5/group_std_mean": 0.16521598398685455,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017488139681518077,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017488139681518077,
"step": 65
},
{
"calibration/aurc": 0.26958251359481367,
"calibration/batch_distribution_entropy": 0.8663688799379698,
"calibration/buffer_distribution_entropy": 0.8733795984294593,
"calibration/confidence_entropy": 0.577260486680282,
"calibration/coverage@0%": 0.02308205424394914,
"calibration/coverage@1%": 0.02308205424394914,
"calibration/coverage@10%": 0.12958876708285477,
"calibration/coverage@15%": 0.14900721673999842,
"calibration/coverage@20%": 0.2683685738270198,
"calibration/coverage@25%": 0.3697268444811656,
"calibration/coverage@30%": 0.6402569338436326,
"calibration/coverage@5%": 0.0755754925641591,
"calibration/ece": 0.09930710020716957,
"calibration/mean_confidence": 0.6231611950933489,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005902777777777768,
"completions/max_length": 3517.4,
"completions/max_terminated_length": 3517.4,
"completions/mean_length": 762.48056640625,
"completions/mean_terminated_length": 767.0295288085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 196.6,
"epoch": 0.16799790002624967,
"grad_norm": 0.00045749920536763966,
"learning_rate": 4.156626506024097e-06,
"loss": -0.0059,
"num_tokens": 144087514.0,
"reward": 0.9868767857551575,
"reward_std": 0.1281371980905533,
"rewards/accuracy_reward": 0.637586796283722,
"rewards/brier_reward": 0.7736868143081665,
"rewards/confidence_uniqueness_reward": 0.9424182176589966,
"rewards/format_reward": 0.9940972208976746,
"rewards/frontier_coverage_0": -0.008795747673138976,
"rewards/frontier_coverage_1": -0.008795747673138976,
"rewards/frontier_coverage_10": -0.008733327453956007,
"rewards/frontier_coverage_15": -0.008401900064200163,
"rewards/frontier_coverage_20": -0.0042295768857002255,
"rewards/frontier_coverage_25": 0.007490093156229704,
"rewards/frontier_coverage_5": -0.008795747673138976,
"signal/accuracy_reward/centered_abs_mean": 0.17555881142616273,
"signal/accuracy_reward/group_std_mean": 0.2333855837583542,
"signal/accuracy_reward/group_zero_std_frac": 0.3305555582046509,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08777940571308136,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08777940571308136,
"signal/advantage_abs_mean": 0.09379418194293976,
"signal/advantage_pre_scale_abs_mean": 0.09379418194293976,
"signal/advantage_pre_scale_std": 0.1555002361536026,
"signal/advantage_std": 0.1555002361536026,
"signal/brier_reward/centered_abs_mean": 0.12457352876663208,
"signal/brier_reward/group_std_mean": 0.160868501663208,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012457353435456753,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012457353435456753,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.032642674446105954,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04912559166550636,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032642676495015623,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032642676495015623,
"signal/format_reward/centered_abs_mean": 0.010753038246184588,
"signal/format_reward/group_std_mean": 0.02312941402196884,
"signal/format_reward/group_zero_std_frac": 0.8944444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005376519123092294,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005376519123092294,
"signal/frontier_coverage_0/centered_abs_mean": 0.13515576124191284,
"signal/frontier_coverage_0/group_std_mean": 0.17945427298545838,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019327274756506085,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019327274756506085,
"signal/frontier_coverage_1/centered_abs_mean": 0.13515576124191284,
"signal/frontier_coverage_1/group_std_mean": 0.17945427298545838,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019327274756506085,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019327274756506085,
"signal/frontier_coverage_10/centered_abs_mean": 0.134914430975914,
"signal/frontier_coverage_10/group_std_mean": 0.17911535501480103,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019292764598503708,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019292764598503708,
"signal/frontier_coverage_15/centered_abs_mean": 0.131193308532238,
"signal/frontier_coverage_15/group_std_mean": 0.1743150144815445,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018760643433779478,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018760643433779478,
"signal/frontier_coverage_20/centered_abs_mean": 0.10956997573375701,
"signal/frontier_coverage_20/group_std_mean": 0.14680338203907012,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001566850603558123,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001566850603558123,
"signal/frontier_coverage_25/centered_abs_mean": 0.06231881156563759,
"signal/frontier_coverage_25/group_std_mean": 0.08558403998613358,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000891158974263817,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000891158974263817,
"signal/frontier_coverage_5/centered_abs_mean": 0.13515576124191284,
"signal/frontier_coverage_5/group_std_mean": 0.17945427298545838,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019327274756506085,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019327274756506085,
"step": 70
},
{
"calibration/aurc": 0.2563588085190992,
"calibration/batch_distribution_entropy": 0.8382992480473315,
"calibration/buffer_distribution_entropy": 0.8805381439631234,
"calibration/confidence_entropy": 0.5695789505685956,
"calibration/coverage@0%": 0.02257520030390938,
"calibration/coverage@1%": 0.02257520030390938,
"calibration/coverage@10%": 0.1989478432794585,
"calibration/coverage@15%": 0.27763706140350874,
"calibration/coverage@20%": 0.41623903508771926,
"calibration/coverage@25%": 0.586359649122807,
"calibration/coverage@30%": 0.6760910087719298,
"calibration/coverage@5%": 0.027838358198646225,
"calibration/ece": 0.10921236148819072,
"calibration/mean_confidence": 0.6479703220803527,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.002951388888888884,
"completions/max_length": 3187.4,
"completions/max_terminated_length": 3187.4,
"completions/mean_length": 755.7357788085938,
"completions/mean_terminated_length": 757.99462890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 218.8,
"epoch": 0.17999775002812465,
"grad_norm": 0.0004290560900699347,
"learning_rate": 4.006024096385543e-06,
"loss": 0.0003,
"num_tokens": 155858486.0,
"reward": 1.017405092716217,
"reward_std": 0.11944967806339264,
"rewards/accuracy_reward": 0.6934895753860474,
"rewards/brier_reward": 0.7949100136756897,
"rewards/confidence_uniqueness_reward": 0.9398416519165039,
"rewards/format_reward": 0.9970486164093018,
"rewards/frontier_coverage_0": -0.023628878220915795,
"rewards/frontier_coverage_1": -0.023628878220915795,
"rewards/frontier_coverage_10": -0.021788668585941195,
"rewards/frontier_coverage_15": -0.014927842747420072,
"rewards/frontier_coverage_20": -0.003723863745108247,
"rewards/frontier_coverage_25": 0.017198705207556488,
"rewards/frontier_coverage_5": -0.023147269897162915,
"signal/accuracy_reward/centered_abs_mean": 0.16501193642616271,
"signal/accuracy_reward/group_std_mean": 0.21951915323734283,
"signal/accuracy_reward/group_zero_std_frac": 0.3694444537162781,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08250596821308136,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08250596821308136,
"signal/advantage_abs_mean": 0.08793365359306335,
"signal/advantage_pre_scale_abs_mean": 0.08793365359306335,
"signal/advantage_pre_scale_std": 0.14816934764385223,
"signal/advantage_std": 0.14816934764385223,
"signal/brier_reward/centered_abs_mean": 0.11173765361309052,
"signal/brier_reward/group_std_mean": 0.14379720985889435,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011173765547573567,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011173765547573567,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030797071009874343,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0448210634291172,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030797069426625966,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030797069426625966,
"signal/format_reward/centered_abs_mean": 0.005631510401144624,
"signal/format_reward/group_std_mean": 0.01455035675317049,
"signal/format_reward/group_zero_std_frac": 0.925,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002815755200572312,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002815755200572312,
"signal/frontier_coverage_0/centered_abs_mean": 0.11744404733180999,
"signal/frontier_coverage_0/group_std_mean": 0.15851396322250366,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001679449831135571,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001679449831135571,
"signal/frontier_coverage_1/centered_abs_mean": 0.11744404733180999,
"signal/frontier_coverage_1/group_std_mean": 0.15851396322250366,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001679449831135571,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001679449831135571,
"signal/frontier_coverage_10/centered_abs_mean": 0.11422204971313477,
"signal/frontier_coverage_10/group_std_mean": 0.15434344708919526,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016333752777427436,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016333752777427436,
"signal/frontier_coverage_15/centered_abs_mean": 0.10063839107751846,
"signal/frontier_coverage_15/group_std_mean": 0.13689128160476685,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014391290256753563,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014391290256753563,
"signal/frontier_coverage_20/centered_abs_mean": 0.07208155021071434,
"signal/frontier_coverage_20/group_std_mean": 0.0996133729815483,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001030766183976084,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001030766183976084,
"signal/frontier_coverage_25/centered_abs_mean": 0.043751812726259234,
"signal/frontier_coverage_25/group_std_mean": 0.06023159921169281,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006256509223021567,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006256509223021567,
"signal/frontier_coverage_5/centered_abs_mean": 0.11698432117700577,
"signal/frontier_coverage_5/group_std_mean": 0.15789782702922822,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016728756949305535,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016728756949305535,
"step": 75
},
{
"calibration/aurc": 0.21554772857493187,
"calibration/batch_distribution_entropy": 0.7793047369317647,
"calibration/buffer_distribution_entropy": 0.859575274807294,
"calibration/confidence_entropy": 0.5226812675460125,
"calibration/coverage@0%": 0.012215909090909092,
"calibration/coverage@1%": 0.012215909090909092,
"calibration/coverage@10%": 0.043605169340463455,
"calibration/coverage@15%": 0.41333556149732625,
"calibration/coverage@20%": 0.47428141711229943,
"calibration/coverage@25%": 0.7336229946524064,
"calibration/coverage@30%": 0.7927083333333333,
"calibration/coverage@5%": 0.01756350267379679,
"calibration/ece": 0.11169882524136499,
"calibration/mean_confidence": 0.7130084327986905,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005642361111111116,
"completions/max_length": 3469.4,
"completions/max_terminated_length": 3469.4,
"completions/mean_length": 808.5981689453125,
"completions/mean_terminated_length": 813.23388671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 204.4,
"epoch": 0.19199760002999963,
"grad_norm": 0.0005567002226598561,
"learning_rate": 3.855421686746989e-06,
"loss": -0.0042,
"num_tokens": 168226817.0,
"reward": 0.9941539883613586,
"reward_std": 0.13035476952791214,
"rewards/accuracy_reward": 0.6485243082046509,
"rewards/brier_reward": 0.7861994743347168,
"rewards/confidence_uniqueness_reward": 0.9314009189605713,
"rewards/format_reward": 0.9942708492279053,
"rewards/frontier_coverage_0": 0.005442132381722331,
"rewards/frontier_coverage_1": 0.005442132381722331,
"rewards/frontier_coverage_10": 0.006071169814094901,
"rewards/frontier_coverage_15": 0.007214262872003019,
"rewards/frontier_coverage_20": 0.011351470567751676,
"rewards/frontier_coverage_25": 0.02855553664267063,
"rewards/frontier_coverage_5": 0.005597225157544017,
"signal/accuracy_reward/centered_abs_mean": 0.17549370527267455,
"signal/accuracy_reward/group_std_mean": 0.23448271155357361,
"signal/accuracy_reward/group_zero_std_frac": 0.32222222089767455,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08774685263633727,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08774685263633727,
"signal/advantage_abs_mean": 0.09637740403413772,
"signal/advantage_pre_scale_abs_mean": 0.09637740403413772,
"signal/advantage_pre_scale_std": 0.16058520078659058,
"signal/advantage_std": 0.16058520078659058,
"signal/brier_reward/centered_abs_mean": 0.12150197178125381,
"signal/brier_reward/group_std_mean": 0.15757565200328827,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01215019728988409,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01215019728988409,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.038753630965948103,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05312336310744285,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003875363012775779,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003875363012775779,
"signal/format_reward/centered_abs_mean": 0.00966796875,
"signal/format_reward/group_std_mean": 0.018474388308823107,
"signal/format_reward/group_zero_std_frac": 0.9222222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004833984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004833984375,
"signal/frontier_coverage_0/centered_abs_mean": 0.10300857871770859,
"signal/frontier_coverage_0/group_std_mean": 0.14024612605571746,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014730226481333374,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014730226481333374,
"signal/frontier_coverage_1/centered_abs_mean": 0.10300857871770859,
"signal/frontier_coverage_1/group_std_mean": 0.14024612605571746,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014730226481333374,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014730226481333374,
"signal/frontier_coverage_10/centered_abs_mean": 0.0971254363656044,
"signal/frontier_coverage_10/group_std_mean": 0.132836189866066,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001388893718831241,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001388893718831241,
"signal/frontier_coverage_15/centered_abs_mean": 0.08325443416833878,
"signal/frontier_coverage_15/group_std_mean": 0.11486416459083557,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011905384133569896,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011905384133569896,
"signal/frontier_coverage_20/centered_abs_mean": 0.056342567503452304,
"signal/frontier_coverage_20/group_std_mean": 0.07883516997098923,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008056987193413079,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008056987193413079,
"signal/frontier_coverage_25/centered_abs_mean": 0.04240647032856941,
"signal/frontier_coverage_25/group_std_mean": 0.05655211955308914,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006064124754630029,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006064124754630029,
"signal/frontier_coverage_5/centered_abs_mean": 0.10220045298337936,
"signal/frontier_coverage_5/group_std_mean": 0.1392603486776352,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014614664250984788,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014614664250984788,
"step": 80
},
{
"calibration/aurc": 0.22017950153274862,
"calibration/batch_distribution_entropy": 0.8714044348934851,
"calibration/buffer_distribution_entropy": 0.8522211124256589,
"calibration/confidence_entropy": 0.5380838558752095,
"calibration/coverage@0%": 0.036259588016304314,
"calibration/coverage@1%": 0.036259588016304314,
"calibration/coverage@10%": 0.11721307560118954,
"calibration/coverage@15%": 0.3834662270669858,
"calibration/coverage@20%": 0.4646737963842374,
"calibration/coverage@25%": 0.6478996052484998,
"calibration/coverage@30%": 0.8137590066730672,
"calibration/coverage@5%": 0.059398362336180996,
"calibration/ece": 0.11638557739382216,
"calibration/mean_confidence": 0.6332810296217062,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004427083333333348,
"completions/max_length": 3764.4,
"completions/max_terminated_length": 3764.4,
"completions/mean_length": 794.3666748046875,
"completions/mean_terminated_length": 797.9120361328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 225.6,
"epoch": 0.2039974500318746,
"grad_norm": 0.00044087867718189955,
"learning_rate": 3.7048192771084342e-06,
"loss": -0.0022,
"num_tokens": 180465121.0,
"reward": 1.0135318279266357,
"reward_std": 0.13029464483261108,
"rewards/accuracy_reward": 0.6832465171813965,
"rewards/brier_reward": 0.8005435109138489,
"rewards/confidence_uniqueness_reward": 0.9350399494171142,
"rewards/format_reward": 0.9953993082046508,
"rewards/frontier_coverage_0": -0.0016260695294477046,
"rewards/frontier_coverage_1": -0.0016260695294477046,
"rewards/frontier_coverage_10": -0.0008423494873568416,
"rewards/frontier_coverage_15": 0.00238975181709975,
"rewards/frontier_coverage_20": 0.011206477042287588,
"rewards/frontier_coverage_25": 0.0376150730997324,
"rewards/frontier_coverage_5": -0.0016260695294477046,
"signal/accuracy_reward/centered_abs_mean": 0.17951931357383727,
"signal/accuracy_reward/group_std_mean": 0.23702281415462495,
"signal/accuracy_reward/group_zero_std_frac": 0.3305555582046509,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08975965678691863,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08975965678691863,
"signal/advantage_abs_mean": 0.09476174563169479,
"signal/advantage_pre_scale_abs_mean": 0.09476174563169479,
"signal/advantage_pre_scale_std": 0.15883181095123292,
"signal/advantage_std": 0.15883181095123292,
"signal/brier_reward/centered_abs_mean": 0.11919141858816147,
"signal/brier_reward/group_std_mean": 0.1569477528333664,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011919141374528408,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011919141374528408,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.034977962449193004,
"signal/confidence_uniqueness_reward/group_std_mean": 0.052396781742572784,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034977963194251062,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034977963194251062,
"signal/format_reward/centered_abs_mean": 0.008707682136446238,
"signal/format_reward/group_std_mean": 0.02160101868212223,
"signal/format_reward/group_zero_std_frac": 0.8916666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004353841068223119,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004353841068223119,
"signal/frontier_coverage_0/centered_abs_mean": 0.11353013217449189,
"signal/frontier_coverage_0/group_std_mean": 0.15329338610172272,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016234809532761573,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016234809532761573,
"signal/frontier_coverage_1/centered_abs_mean": 0.11353013217449189,
"signal/frontier_coverage_1/group_std_mean": 0.15329338610172272,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016234809532761573,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016234809532761573,
"signal/frontier_coverage_10/centered_abs_mean": 0.11054201275110245,
"signal/frontier_coverage_10/group_std_mean": 0.14934370666742325,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015807508490979672,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015807508490979672,
"signal/frontier_coverage_15/centered_abs_mean": 0.09602530598640442,
"signal/frontier_coverage_15/group_std_mean": 0.13076421320438386,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013731618179008364,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013731618179008364,
"signal/frontier_coverage_20/centered_abs_mean": 0.058435800671577456,
"signal/frontier_coverage_20/group_std_mean": 0.08124178051948547,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008356319856829941,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008356319856829941,
"signal/frontier_coverage_25/centered_abs_mean": 0.045007632672786714,
"signal/frontier_coverage_25/group_std_mean": 0.05981680378317833,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006436091498471797,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006436091498471797,
"signal/frontier_coverage_5/centered_abs_mean": 0.11353013217449189,
"signal/frontier_coverage_5/group_std_mean": 0.15329338610172272,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016234809532761573,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016234809532761573,
"step": 85
},
{
"calibration/aurc": 0.19130533906993302,
"calibration/batch_distribution_entropy": 0.8412225489716348,
"calibration/buffer_distribution_entropy": 0.8626075627761844,
"calibration/confidence_entropy": 0.5053431016428965,
"calibration/coverage@0%": 0.03859342792359489,
"calibration/coverage@1%": 0.03859342792359489,
"calibration/coverage@10%": 0.25211929092498037,
"calibration/coverage@15%": 0.3443653155241082,
"calibration/coverage@20%": 0.4048542125930009,
"calibration/coverage@25%": 0.831518123679618,
"calibration/coverage@30%": 0.9070597960870763,
"calibration/coverage@5%": 0.09598164593926069,
"calibration/ece": 0.0995040290382317,
"calibration/mean_confidence": 0.6870986648020756,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006163194444444442,
"completions/max_length": 3540.4,
"completions/max_terminated_length": 3540.4,
"completions/mean_length": 758.8940307617188,
"completions/mean_terminated_length": 763.611083984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 206.2,
"epoch": 0.2159973000337496,
"grad_norm": 0.0005134688108228147,
"learning_rate": 3.5542168674698798e-06,
"loss": -0.0033,
"num_tokens": 192276252.0,
"reward": 1.0077428221702576,
"reward_std": 0.12586894929409026,
"rewards/accuracy_reward": 0.6733506917953491,
"rewards/brier_reward": 0.7962008833885192,
"rewards/confidence_uniqueness_reward": 0.9313777089118958,
"rewards/format_reward": 0.9938367962837219,
"rewards/frontier_coverage_0": 0.004424169240519404,
"rewards/frontier_coverage_1": 0.004424169240519404,
"rewards/frontier_coverage_10": 0.005338566357386299,
"rewards/frontier_coverage_15": 0.009187003783881664,
"rewards/frontier_coverage_20": 0.01867530047893524,
"rewards/frontier_coverage_25": 0.05060422196984291,
"rewards/frontier_coverage_5": 0.004631689615052892,
"signal/accuracy_reward/centered_abs_mean": 0.16591254472732545,
"signal/accuracy_reward/group_std_mean": 0.21845885515213012,
"signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08295627236366272,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08295627236366272,
"signal/advantage_abs_mean": 0.09228640496730804,
"signal/advantage_pre_scale_abs_mean": 0.09228640496730804,
"signal/advantage_pre_scale_std": 0.15987550914287568,
"signal/advantage_std": 0.15987550914287568,
"signal/brier_reward/centered_abs_mean": 0.117860808968544,
"signal/brier_reward/group_std_mean": 0.1544253945350647,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011786081641912461,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011786081641912461,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03850234746932983,
"signal/confidence_uniqueness_reward/group_std_mean": 0.056425672769546506,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038502346724271774,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038502346724271774,
"signal/format_reward/centered_abs_mean": 0.011311848741024732,
"signal/format_reward/group_std_mean": 0.02502160966396332,
"signal/format_reward/group_zero_std_frac": 0.8833333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005655924370512366,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005655924370512366,
"signal/frontier_coverage_0/centered_abs_mean": 0.09827356785535812,
"signal/frontier_coverage_0/group_std_mean": 0.13401113748550414,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001405311981216073,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001405311981216073,
"signal/frontier_coverage_1/centered_abs_mean": 0.09827356785535812,
"signal/frontier_coverage_1/group_std_mean": 0.13401113748550414,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001405311981216073,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001405311981216073,
"signal/frontier_coverage_10/centered_abs_mean": 0.09452640563249588,
"signal/frontier_coverage_10/group_std_mean": 0.12922739684581758,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013517276151105762,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013517276151105762,
"signal/frontier_coverage_15/centered_abs_mean": 0.07233314439654351,
"signal/frontier_coverage_15/group_std_mean": 0.10092607736587525,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010343640227802099,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010343640227802099,
"signal/frontier_coverage_20/centered_abs_mean": 0.049670548737049104,
"signal/frontier_coverage_20/group_std_mean": 0.06895174235105514,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007102888310328126,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007102888310328126,
"signal/frontier_coverage_25/centered_abs_mean": 0.05155856236815452,
"signal/frontier_coverage_25/group_std_mean": 0.067772376537323,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007372874300926924,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007372874300926924,
"signal/frontier_coverage_5/centered_abs_mean": 0.09758316129446029,
"signal/frontier_coverage_5/group_std_mean": 0.13312698602676393,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013954391703009605,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013954391703009605,
"step": 90
},
{
"calibration/aurc": 0.23218282614032365,
"calibration/batch_distribution_entropy": 0.8237960782553888,
"calibration/buffer_distribution_entropy": 0.8804108398849207,
"calibration/confidence_entropy": 0.5243356933254755,
"calibration/coverage@0%": 0.01052649005750661,
"calibration/coverage@1%": 0.01052649005750661,
"calibration/coverage@10%": 0.09966577080811616,
"calibration/coverage@15%": 0.431924512069393,
"calibration/coverage@20%": 0.617088188355147,
"calibration/coverage@25%": 0.6702744892179752,
"calibration/coverage@30%": 0.710721242177679,
"calibration/coverage@5%": 0.03245860494001314,
"calibration/ece": 0.1193284582303358,
"calibration/mean_confidence": 0.6877564386884953,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006163194444444442,
"completions/max_length": 3526.8,
"completions/max_terminated_length": 3526.8,
"completions/mean_length": 766.1975708007812,
"completions/mean_terminated_length": 770.9320678710938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 220.8,
"epoch": 0.22799715003562457,
"grad_norm": 0.0004157455696258694,
"learning_rate": 3.4036144578313257e-06,
"loss": -0.004,
"num_tokens": 204194528.0,
"reward": 1.0064563512802125,
"reward_std": 0.12273335456848145,
"rewards/accuracy_reward": 0.6696180582046509,
"rewards/brier_reward": 0.7975351452827454,
"rewards/confidence_uniqueness_reward": 0.9334475994110107,
"rewards/format_reward": 0.9934895753860473,
"rewards/frontier_coverage_0": 0.007734180334955454,
"rewards/frontier_coverage_1": 0.007734180334955454,
"rewards/frontier_coverage_10": 0.008875045739114285,
"rewards/frontier_coverage_15": 0.013548683421686292,
"rewards/frontier_coverage_20": 0.02269660122692585,
"rewards/frontier_coverage_25": 0.05776782408356666,
"rewards/frontier_coverage_5": 0.007814234215766191,
"signal/accuracy_reward/centered_abs_mean": 0.1544704854488373,
"signal/accuracy_reward/group_std_mean": 0.2076838880777359,
"signal/accuracy_reward/group_zero_std_frac": 0.4,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07723524272441865,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07723524272441865,
"signal/advantage_abs_mean": 0.08850989192724228,
"signal/advantage_pre_scale_abs_mean": 0.08850989192724228,
"signal/advantage_pre_scale_std": 0.1567450851202011,
"signal/advantage_std": 0.1567450851202011,
"signal/brier_reward/centered_abs_mean": 0.11523358970880508,
"signal/brier_reward/group_std_mean": 0.15127619802951814,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011523359268903733,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011523359268903733,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.036900246143341066,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05565410703420639,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036900244653224946,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036900244653224946,
"signal/format_reward/centered_abs_mean": 0.01131184899713844,
"signal/format_reward/group_std_mean": 0.026015446335077286,
"signal/format_reward/group_zero_std_frac": 0.8722222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00565592449856922,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00565592449856922,
"signal/frontier_coverage_0/centered_abs_mean": 0.09024225771427155,
"signal/frontier_coverage_0/group_std_mean": 0.12343428432941436,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012904643081128597,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012904643081128597,
"signal/frontier_coverage_1/centered_abs_mean": 0.09024225771427155,
"signal/frontier_coverage_1/group_std_mean": 0.12343428432941436,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012904643081128597,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012904643081128597,
"signal/frontier_coverage_10/centered_abs_mean": 0.08697677999734879,
"signal/frontier_coverage_10/group_std_mean": 0.1192005679011345,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012437679572030902,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012437679572030902,
"signal/frontier_coverage_15/centered_abs_mean": 0.06753548979759216,
"signal/frontier_coverage_15/group_std_mean": 0.09412883222103119,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009657575283199549,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009657575283199549,
"signal/frontier_coverage_20/centered_abs_mean": 0.047610755264759066,
"signal/frontier_coverage_20/group_std_mean": 0.06612022668123245,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006808338337577879,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006808338337577879,
"signal/frontier_coverage_25/centered_abs_mean": 0.05751822665333748,
"signal/frontier_coverage_25/group_std_mean": 0.07561186105012893,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008225106517784298,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008225106517784298,
"signal/frontier_coverage_5/centered_abs_mean": 0.08994513750076294,
"signal/frontier_coverage_5/group_std_mean": 0.12307111024856568,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012862155679613351,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012862155679613351,
"step": 95
},
{
"calibration/aurc": 0.20759978881743368,
"calibration/batch_distribution_entropy": 0.7872453416290367,
"calibration/buffer_distribution_entropy": 0.851199244324374,
"calibration/confidence_entropy": 0.5093936144746041,
"calibration/coverage@0%": 0.012143117253085606,
"calibration/coverage@1%": 0.012143117253085606,
"calibration/coverage@10%": 0.10598727560385317,
"calibration/coverage@15%": 0.2561893629834925,
"calibration/coverage@20%": 0.49089280471040475,
"calibration/coverage@25%": 0.6995251651659927,
"calibration/coverage@30%": 0.8823003139129817,
"calibration/coverage@5%": 0.031241260489159883,
"calibration/ece": 0.0877942040353775,
"calibration/mean_confidence": 0.7242433693125331,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01328125,
"completions/max_length": 3636.2,
"completions/max_terminated_length": 3636.2,
"completions/mean_length": 782.308251953125,
"completions/mean_terminated_length": 792.8037231445312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 226.6,
"epoch": 0.23999700003749952,
"grad_norm": 0.00045198958832770586,
"learning_rate": 3.2530120481927713e-06,
"loss": -0.0101,
"num_tokens": 216305791.0,
"reward": 1.0077686071395875,
"reward_std": 0.13463030606508256,
"rewards/accuracy_reward": 0.6796875,
"rewards/brier_reward": 0.7993229150772094,
"rewards/confidence_uniqueness_reward": 0.9266997456550599,
"rewards/format_reward": 0.9866319417953491,
"rewards/frontier_coverage_0": 0.009804848302155732,
"rewards/frontier_coverage_1": 0.009804848302155732,
"rewards/frontier_coverage_10": 0.01042446969076991,
"rewards/frontier_coverage_15": 0.012922577001154423,
"rewards/frontier_coverage_20": 0.02126994784921408,
"rewards/frontier_coverage_25": 0.06628896966576577,
"rewards/frontier_coverage_5": 0.009804848302155732,
"signal/accuracy_reward/centered_abs_mean": 0.16714409589767457,
"signal/accuracy_reward/group_std_mean": 0.21892527341842652,
"signal/accuracy_reward/group_zero_std_frac": 0.38611111640930174,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08357204794883728,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08357204794883728,
"signal/advantage_abs_mean": 0.09943573027849198,
"signal/advantage_pre_scale_abs_mean": 0.09943573027849198,
"signal/advantage_pre_scale_std": 0.17322509586811066,
"signal/advantage_std": 0.17322509586811066,
"signal/brier_reward/centered_abs_mean": 0.12519195675849915,
"signal/brier_reward/group_std_mean": 0.1623040735721588,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012519196048378945,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012519196048378945,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04277070388197899,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06450802609324455,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004277070425450802,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004277070425450802,
"signal/format_reward/centered_abs_mean": 0.02023654468357563,
"signal/format_reward/group_std_mean": 0.03803690262138844,
"signal/format_reward/group_zero_std_frac": 0.8444444417953492,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010118272341787814,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010118272341787814,
"signal/frontier_coverage_0/centered_abs_mean": 0.08244038820266723,
"signal/frontier_coverage_0/group_std_mean": 0.11372108608484269,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0011788975214585661,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0011788975214585661,
"signal/frontier_coverage_1/centered_abs_mean": 0.08244038820266723,
"signal/frontier_coverage_1/group_std_mean": 0.11372108608484269,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0011788975214585661,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0011788975214585661,
"signal/frontier_coverage_10/centered_abs_mean": 0.07991492450237274,
"signal/frontier_coverage_10/group_std_mean": 0.11050502359867095,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0011427834630012511,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011427834630012511,
"signal/frontier_coverage_15/centered_abs_mean": 0.0665148988366127,
"signal/frontier_coverage_15/group_std_mean": 0.09321689903736115,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009511630749329924,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009511630749329924,
"signal/frontier_coverage_20/centered_abs_mean": 0.04453737959265709,
"signal/frontier_coverage_20/group_std_mean": 0.06285597681999207,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006368845235556364,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006368845235556364,
"signal/frontier_coverage_25/centered_abs_mean": 0.060834895074367526,
"signal/frontier_coverage_25/group_std_mean": 0.08100210577249527,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008699389640241861,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008699389640241861,
"signal/frontier_coverage_5/centered_abs_mean": 0.08244038820266723,
"signal/frontier_coverage_5/group_std_mean": 0.11372108608484269,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0011788975214585661,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0011788975214585661,
"step": 100
},
{
"epoch": 0.23999700003749952,
"eval_calibration/aurc": 0.13921226805258638,
"eval_calibration/batch_distribution_entropy": 0.7191935689276043,
"eval_calibration/buffer_distribution_entropy": 0.8323235519691029,
"eval_calibration/confidence_entropy": 0.47122875449565105,
"eval_calibration/coverage@0%": 0.16717069892473116,
"eval_calibration/coverage@1%": 0.16717069892473116,
"eval_calibration/coverage@10%": 0.46639784946236557,
"eval_calibration/coverage@15%": 0.6330645161290323,
"eval_calibration/coverage@20%": 0.837869623655914,
"eval_calibration/coverage@25%": 0.9321236559139785,
"eval_calibration/coverage@30%": 0.9635416666666666,
"eval_calibration/coverage@5%": 0.24529569892473116,
"eval_calibration/ece": 0.15433622692672344,
"eval_calibration/mean_confidence": 0.7643868913528463,
"eval_completions/clipped_ratio": 0.014756944444444456,
"eval_completions/max_length": 2571.1666666666665,
"eval_completions/max_terminated_length": 2571.1666666666665,
"eval_completions/mean_length": 752.1073099772135,
"eval_completions/mean_terminated_length": 763.3598937988281,
"eval_completions/min_length": 104.16666666666667,
"eval_completions/min_terminated_length": 271.8333333333333,
"eval_loss": 0.0,
"eval_num_tokens": 216305791.0,
"eval_reward": 0.9933919807275137,
"eval_reward_std": 0.27725009868542355,
"eval_rewards/accuracy_reward": 0.6710069477558136,
"eval_rewards/brier_reward": 0.7916092475255331,
"eval_rewards/confidence_uniqueness_reward": 0.8643936216831207,
"eval_rewards/format_reward": 0.980902781089147,
"eval_rewards/frontier_coverage_0": 0.011102605417060355,
"eval_rewards/frontier_coverage_1": 0.011102605417060355,
"eval_rewards/frontier_coverage_10": 0.011642855126410723,
"eval_rewards/frontier_coverage_15": 0.012925609441784522,
"eval_rewards/frontier_coverage_20": 0.017780127624670666,
"eval_rewards/frontier_coverage_25": 0.05278685626884302,
"eval_rewards/frontier_coverage_5": 0.011105729693857333,
"eval_runtime": 206.9157,
"eval_samples_per_second": 4.833,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4260525157054265,
"eval_signal/accuracy_reward/group_std_mean": 0.4678276677926381,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21302625785271326,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21302625785271326,
"eval_signal/advantage_abs_mean": 0.2413168102502823,
"eval_signal/advantage_pre_scale_abs_mean": 0.2413168102502823,
"eval_signal/advantage_pre_scale_std": 0.2765214368700981,
"eval_signal/advantage_std": 0.2765214368700981,
"eval_signal/brier_reward/centered_abs_mean": 0.2165469080209732,
"eval_signal/brier_reward/group_std_mean": 0.26921579490105313,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021654691236714523,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.021654691236714523,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06999108629922073,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.11411779932677746,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006999108707532287,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006999108707532287,
"eval_signal/format_reward/centered_abs_mean": 0.03613281218955914,
"eval_signal/format_reward/group_std_mean": 0.08657800406217575,
"eval_signal/format_reward/group_zero_std_frac": 0.583333338300387,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.01806640609477957,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.01806640609477957,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.08888960257172585,
"eval_signal/frontier_coverage_0/group_std_mean": 0.14864219104250273,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012711213203147054,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012711213203147054,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.08888960257172585,
"eval_signal/frontier_coverage_1/group_std_mean": 0.14864219104250273,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012711213203147054,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012711213203147054,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.08579947799444199,
"eval_signal/frontier_coverage_10/group_std_mean": 0.14465376734733582,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012269325282735128,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012269325282735128,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.06808544136583805,
"eval_signal/frontier_coverage_15/group_std_mean": 0.12035164733727773,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009736218586719284,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009736218586719284,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.04719839679698149,
"eval_signal/frontier_coverage_20/group_std_mean": 0.08246325453122456,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006749370562223097,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006749370562223097,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.09537930289904277,
"eval_signal/frontier_coverage_25/group_std_mean": 0.12517398471633592,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001363924064207822,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001363924064207822,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.08822002758582433,
"eval_signal/frontier_coverage_5/group_std_mean": 0.1477730112771193,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012615464123276372,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012615464123276372,
"eval_steps_per_second": 0.029,
"step": 100
},
{
"calibration/aurc": 0.3479992140027637,
"calibration/batch_distribution_entropy": 0.750787770254411,
"calibration/buffer_distribution_entropy": 0.8211954941825012,
"calibration/confidence_entropy": 0.46615226530585224,
"calibration/coverage@0%": 0.019922428534220153,
"calibration/coverage@1%": 0.019922428534220153,
"calibration/coverage@10%": 0.12016627849477383,
"calibration/coverage@15%": 0.1617470000037748,
"calibration/coverage@20%": 0.19590033859661704,
"calibration/coverage@25%": 0.21370138571703592,
"calibration/coverage@30%": 0.3333657334183914,
"calibration/coverage@5%": 0.09898002015725679,
"calibration/ece": 0.17690751887463851,
"calibration/mean_confidence": 0.7663661401788909,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.020659722222222232,
"completions/max_length": 3665.8,
"completions/max_terminated_length": 3665.8,
"completions/mean_length": 753.8394897460937,
"completions/mean_terminated_length": 769.899658203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 205.6,
"epoch": 0.2519968500393745,
"grad_norm": 0.00047751839156262577,
"learning_rate": 3.1024096385542172e-06,
"loss": -0.017,
"num_tokens": 228066886.0,
"reward": 0.9950996160507202,
"reward_std": 0.14413480460643768,
"rewards/accuracy_reward": 0.6709201335906982,
"rewards/brier_reward": 0.774049949645996,
"rewards/confidence_uniqueness_reward": 0.9144436717033386,
"rewards/format_reward": 0.9793402791023255,
"rewards/frontier_coverage_0": 0.0029952601238619537,
"rewards/frontier_coverage_1": 0.0029952601238619537,
"rewards/frontier_coverage_10": 0.002985831905971281,
"rewards/frontier_coverage_15": 0.005100842425599694,
"rewards/frontier_coverage_20": 0.011829984840005636,
"rewards/frontier_coverage_25": 0.04955209493637085,
"rewards/frontier_coverage_5": 0.002863927249563858,
"signal/accuracy_reward/centered_abs_mean": 0.16414388120174409,
"signal/accuracy_reward/group_std_mean": 0.2145601123571396,
"signal/accuracy_reward/group_zero_std_frac": 0.402777773141861,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08207194060087204,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08207194060087204,
"signal/advantage_abs_mean": 0.10552676767110825,
"signal/advantage_pre_scale_abs_mean": 0.10552676767110825,
"signal/advantage_pre_scale_std": 0.18652166426181793,
"signal/advantage_std": 0.18652166426181793,
"signal/brier_reward/centered_abs_mean": 0.13289882838726044,
"signal/brier_reward/group_std_mean": 0.17235172688961028,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01328988280147314,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01328988280147314,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05428531989455223,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0814499482512474,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00542853195220232,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00542853195220232,
"signal/format_reward/centered_abs_mean": 0.03229166679084301,
"signal/format_reward/group_std_mean": 0.056061620265245436,
"signal/format_reward/group_zero_std_frac": 0.7833333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016145833395421506,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.016145833395421506,
"signal/frontier_coverage_0/centered_abs_mean": 0.06614647805690765,
"signal/frontier_coverage_0/group_std_mean": 0.09260518848896027,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0009458946529775858,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0009458946529775858,
"signal/frontier_coverage_1/centered_abs_mean": 0.06614647805690765,
"signal/frontier_coverage_1/group_std_mean": 0.09260518848896027,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0009458946529775858,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0009458946529775858,
"signal/frontier_coverage_10/centered_abs_mean": 0.06400079652667046,
"signal/frontier_coverage_10/group_std_mean": 0.089921535551548,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0009152113692834973,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009152113692834973,
"signal/frontier_coverage_15/centered_abs_mean": 0.05834746509790421,
"signal/frontier_coverage_15/group_std_mean": 0.08256930112838745,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00083436876302585,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00083436876302585,
"signal/frontier_coverage_20/centered_abs_mean": 0.03862107619643211,
"signal/frontier_coverage_20/group_std_mean": 0.055405861139297484,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005522813764400781,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005522813764400781,
"signal/frontier_coverage_25/centered_abs_mean": 0.04953863024711609,
"signal/frontier_coverage_25/group_std_mean": 0.06496639400720597,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007084024371579289,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007084024371579289,
"signal/frontier_coverage_5/centered_abs_mean": 0.06591839194297791,
"signal/frontier_coverage_5/group_std_mean": 0.09233485758304597,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0009426330449059606,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0009426330449059606,
"step": 105
},
{
"calibration/aurc": 0.24207440577624634,
"calibration/batch_distribution_entropy": 0.6875765593814578,
"calibration/buffer_distribution_entropy": 0.7860359646652466,
"calibration/confidence_entropy": 0.4102154748788104,
"calibration/coverage@0%": 0.022595843714871777,
"calibration/coverage@1%": 0.022595843714871777,
"calibration/coverage@10%": 0.2102467277237638,
"calibration/coverage@15%": 0.2534104615446351,
"calibration/coverage@20%": 0.3359725625062341,
"calibration/coverage@25%": 0.5277390933544955,
"calibration/coverage@30%": 0.5790548647469459,
"calibration/coverage@5%": 0.11875024921629493,
"calibration/ece": 0.15191207953492325,
"calibration/mean_confidence": 0.800740167621276,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.018663194444444465,
"completions/max_length": 3719.2,
"completions/max_terminated_length": 3719.2,
"completions/mean_length": 743.8192993164063,
"completions/mean_terminated_length": 758.0759155273438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 211.2,
"epoch": 0.2639967000412495,
"grad_norm": 0.0006374148651957512,
"learning_rate": 2.9518072289156627e-06,
"loss": -0.0149,
"num_tokens": 239744132.0,
"reward": 1.0158015012741088,
"reward_std": 0.13897253274917604,
"rewards/accuracy_reward": 0.7074652791023255,
"rewards/brier_reward": 0.7943165302276611,
"rewards/confidence_uniqueness_reward": 0.9072112798690796,
"rewards/format_reward": 0.98125,
"rewards/frontier_coverage_0": 0.002520253928378224,
"rewards/frontier_coverage_1": 0.002520253928378224,
"rewards/frontier_coverage_10": 0.002841651951894164,
"rewards/frontier_coverage_15": 0.0039699568413198,
"rewards/frontier_coverage_20": 0.013109351228922605,
"rewards/frontier_coverage_25": 0.0626706637442112,
"rewards/frontier_coverage_5": 0.0026505836751312016,
"signal/accuracy_reward/centered_abs_mean": 0.1546115458011627,
"signal/accuracy_reward/group_std_mean": 0.21346699297428132,
"signal/accuracy_reward/group_zero_std_frac": 0.3583333343267441,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07730577290058135,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07730577290058135,
"signal/advantage_abs_mean": 0.09841311872005462,
"signal/advantage_pre_scale_abs_mean": 0.09841311872005462,
"signal/advantage_pre_scale_std": 0.1800345003604889,
"signal/advantage_std": 0.1800345003604889,
"signal/brier_reward/centered_abs_mean": 0.12525416761636735,
"signal/brier_reward/group_std_mean": 0.16696035861968994,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012525417283177376,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012525417283177376,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0548114612698555,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08105210959911346,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005481146275997162,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005481146275997162,
"signal/format_reward/centered_abs_mean": 0.02778862789273262,
"signal/format_reward/group_std_mean": 0.04899119287729263,
"signal/format_reward/group_zero_std_frac": 0.8055555820465088,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01389431394636631,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01389431394636631,
"signal/frontier_coverage_0/centered_abs_mean": 0.056989597529172896,
"signal/frontier_coverage_0/group_std_mean": 0.08076736629009247,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0008149512344971299,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0008149512344971299,
"signal/frontier_coverage_1/centered_abs_mean": 0.056989597529172896,
"signal/frontier_coverage_1/group_std_mean": 0.08076736629009247,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0008149512344971299,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0008149512344971299,
"signal/frontier_coverage_10/centered_abs_mean": 0.05564908087253571,
"signal/frontier_coverage_10/group_std_mean": 0.07900142818689346,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007957818452268839,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007957818452268839,
"signal/frontier_coverage_15/centered_abs_mean": 0.0526436798274517,
"signal/frontier_coverage_15/group_std_mean": 0.0750760056078434,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007528046262450516,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007528046262450516,
"signal/frontier_coverage_20/centered_abs_mean": 0.037152212113142014,
"signal/frontier_coverage_20/group_std_mean": 0.05306043922901153,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005312766588758677,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005312766588758677,
"signal/frontier_coverage_25/centered_abs_mean": 0.04958587661385536,
"signal/frontier_coverage_25/group_std_mean": 0.06510179191827774,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000709078018553555,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000709078018553555,
"signal/frontier_coverage_5/centered_abs_mean": 0.05671231150627136,
"signal/frontier_coverage_5/group_std_mean": 0.08039210587739945,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0008109860471449792,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0008109860471449792,
"step": 110
},
{
"calibration/aurc": 0.33356083398146347,
"calibration/batch_distribution_entropy": 0.6959018763618702,
"calibration/buffer_distribution_entropy": 0.7522820617818654,
"calibration/confidence_entropy": 0.4518816489007994,
"calibration/coverage@0%": 0.0032267264856438273,
"calibration/coverage@1%": 0.0032267264856438273,
"calibration/coverage@10%": 0.044937956432167886,
"calibration/coverage@15%": 0.08441266677084883,
"calibration/coverage@20%": 0.15283620421861482,
"calibration/coverage@25%": 0.3622388517951728,
"calibration/coverage@30%": 0.5879621018367821,
"calibration/coverage@5%": 0.0032267264856438273,
"calibration/ece": 0.20717975780837206,
"calibration/mean_confidence": 0.7802978326710153,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02144097222222221,
"completions/max_length": 3724.6,
"completions/max_terminated_length": 3724.6,
"completions/mean_length": 718.7766479492187,
"completions/mean_terminated_length": 734.714306640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 199.2,
"epoch": 0.27599655004312446,
"grad_norm": 0.0006048035575076938,
"learning_rate": 2.8012048192771087e-06,
"loss": -0.0178,
"num_tokens": 251103639.0,
"reward": 0.9875237822532654,
"reward_std": 0.1449933499097824,
"rewards/accuracy_reward": 0.6601562619209289,
"rewards/brier_reward": 0.7643703937530517,
"rewards/confidence_uniqueness_reward": 0.9071934223175049,
"rewards/format_reward": 0.9785590291023254,
"rewards/frontier_coverage_0": 0.004726332519203425,
"rewards/frontier_coverage_1": 0.004726332519203425,
"rewards/frontier_coverage_10": 0.005253351200371981,
"rewards/frontier_coverage_15": 0.005590797681361437,
"rewards/frontier_coverage_20": 0.008738029189407826,
"rewards/frontier_coverage_25": 0.03678738847374916,
"rewards/frontier_coverage_5": 0.004789900593459606,
"signal/accuracy_reward/centered_abs_mean": 0.1631022125482559,
"signal/accuracy_reward/group_std_mean": 0.2127610206604004,
"signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08155110627412795,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08155110627412795,
"signal/advantage_abs_mean": 0.10652477592229843,
"signal/advantage_pre_scale_abs_mean": 0.10652477592229843,
"signal/advantage_pre_scale_std": 0.18951753973960878,
"signal/advantage_std": 0.18951753973960878,
"signal/brier_reward/centered_abs_mean": 0.13075682073831557,
"signal/brier_reward/group_std_mean": 0.17059859931468963,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013075682520866393,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013075682520866393,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05514480024576187,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08304563462734223,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005514480173587799,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005514480173587799,
"signal/format_reward/centered_abs_mean": 0.03312174491584301,
"signal/format_reward/group_std_mean": 0.05764241740107536,
"signal/format_reward/group_zero_std_frac": 0.7777777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016560872457921504,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.016560872457921504,
"signal/frontier_coverage_0/centered_abs_mean": 0.051223869621753695,
"signal/frontier_coverage_0/group_std_mean": 0.07264740690588951,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0007325013517402113,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0007325013517402113,
"signal/frontier_coverage_1/centered_abs_mean": 0.051223869621753695,
"signal/frontier_coverage_1/group_std_mean": 0.07264740690588951,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0007325013517402113,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0007325013517402113,
"signal/frontier_coverage_10/centered_abs_mean": 0.04941959977149964,
"signal/frontier_coverage_10/group_std_mean": 0.07031489610671997,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007067002821713686,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007067002821713686,
"signal/frontier_coverage_15/centered_abs_mean": 0.04780538156628609,
"signal/frontier_coverage_15/group_std_mean": 0.06820949018001557,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0006836169632151723,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0006836169632151723,
"signal/frontier_coverage_20/centered_abs_mean": 0.03383687846362591,
"signal/frontier_coverage_20/group_std_mean": 0.0490049920976162,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0004838673456106335,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0004838673456106335,
"signal/frontier_coverage_25/centered_abs_mean": 0.04597809240221977,
"signal/frontier_coverage_25/group_std_mean": 0.06020479202270508,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006574866769369691,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006574866769369691,
"signal/frontier_coverage_5/centered_abs_mean": 0.05090032443404198,
"signal/frontier_coverage_5/group_std_mean": 0.07223524823784828,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0007278746110387146,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0007278746110387146,
"step": 115
},
{
"calibration/aurc": 0.31065150593398905,
"calibration/batch_distribution_entropy": 0.7314189320033493,
"calibration/buffer_distribution_entropy": 0.7229353609548825,
"calibration/confidence_entropy": 0.4617376720170327,
"calibration/coverage@0%": 0.018317815283684445,
"calibration/coverage@1%": 0.018317815283684445,
"calibration/coverage@10%": 0.0586319514093389,
"calibration/coverage@15%": 0.1323828570454714,
"calibration/coverage@20%": 0.20926282095672583,
"calibration/coverage@25%": 0.5352344590321512,
"calibration/coverage@30%": 0.5989735572236128,
"calibration/coverage@5%": 0.022506296959077115,
"calibration/ece": 0.1755218508227229,
"calibration/mean_confidence": 0.7753019329613121,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015451388888888862,
"completions/max_length": 3661.4,
"completions/max_terminated_length": 3661.4,
"completions/mean_length": 682.734814453125,
"completions/mean_terminated_length": 693.4420043945313,
"completions/min_length": 0.0,
"completions/min_terminated_length": 205.6,
"epoch": 0.28799640004499943,
"grad_norm": 0.0003586419625207782,
"learning_rate": 2.6506024096385547e-06,
"loss": -0.0125,
"num_tokens": 262050600.0,
"reward": 1.0042026162147522,
"reward_std": 0.13399964123964309,
"rewards/accuracy_reward": 0.6789930582046508,
"rewards/brier_reward": 0.7894548892974853,
"rewards/confidence_uniqueness_reward": 0.9230864763259887,
"rewards/format_reward": 0.9845486044883728,
"rewards/frontier_coverage_0": 0.007614323310554028,
"rewards/frontier_coverage_1": 0.007614323310554028,
"rewards/frontier_coverage_10": 0.007986792828887701,
"rewards/frontier_coverage_15": 0.009333854354918004,
"rewards/frontier_coverage_20": 0.01230423217639327,
"rewards/frontier_coverage_25": 0.02976319268345833,
"rewards/frontier_coverage_5": 0.007735726609826088,
"signal/accuracy_reward/centered_abs_mean": 0.15657551884651183,
"signal/accuracy_reward/group_std_mean": 0.20762513875961303,
"signal/accuracy_reward/group_zero_std_frac": 0.40833333134651184,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07828775942325591,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07828775942325591,
"signal/advantage_abs_mean": 0.09784245938062668,
"signal/advantage_pre_scale_abs_mean": 0.09784245938062668,
"signal/advantage_pre_scale_std": 0.17648713588714598,
"signal/advantage_std": 0.17648713588714598,
"signal/brier_reward/centered_abs_mean": 0.12497896701097488,
"signal/brier_reward/group_std_mean": 0.16387878954410554,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012497896514832973,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012497896514832973,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04354229345917702,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06585515961050988,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004354229662567377,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004354229662567377,
"signal/format_reward/centered_abs_mean": 0.02369791679084301,
"signal/format_reward/group_std_mean": 0.04182791784405708,
"signal/format_reward/group_zero_std_frac": 0.8361111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011848958395421504,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011848958395421504,
"signal/frontier_coverage_0/centered_abs_mean": 0.06305849850177765,
"signal/frontier_coverage_0/group_std_mean": 0.08656549751758576,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0009017364820465446,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0009017364820465446,
"signal/frontier_coverage_1/centered_abs_mean": 0.06305849850177765,
"signal/frontier_coverage_1/group_std_mean": 0.08656549751758576,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0009017364820465446,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0009017364820465446,
"signal/frontier_coverage_10/centered_abs_mean": 0.06107858419418335,
"signal/frontier_coverage_10/group_std_mean": 0.08405377566814423,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0008734237751923501,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008734237751923501,
"signal/frontier_coverage_15/centered_abs_mean": 0.05693260729312897,
"signal/frontier_coverage_15/group_std_mean": 0.07869968116283417,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008141362806782127,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008141362806782127,
"signal/frontier_coverage_20/centered_abs_mean": 0.041933455318212506,
"signal/frontier_coverage_20/group_std_mean": 0.05869346261024475,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005996484076604247,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005996484076604247,
"signal/frontier_coverage_25/centered_abs_mean": 0.04046922326087952,
"signal/frontier_coverage_25/group_std_mean": 0.053788629919290544,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005787098547443747,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005787098547443747,
"signal/frontier_coverage_5/centered_abs_mean": 0.06229802295565605,
"signal/frontier_coverage_5/group_std_mean": 0.08560123592615128,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0008908617543056607,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0008908617543056607,
"step": 120
},
{
"calibration/aurc": 0.2082183569339026,
"calibration/batch_distribution_entropy": 0.8211991327882566,
"calibration/buffer_distribution_entropy": 0.7568997286321094,
"calibration/confidence_entropy": 0.5527962206752399,
"calibration/coverage@0%": 0.022027035701369612,
"calibration/coverage@1%": 0.022027035701369612,
"calibration/coverage@10%": 0.29937981558557936,
"calibration/coverage@15%": 0.3620036508586016,
"calibration/coverage@20%": 0.4019900773574,
"calibration/coverage@25%": 0.6806566429667174,
"calibration/coverage@30%": 0.7411075469279128,
"calibration/coverage@5%": 0.08646273123942735,
"calibration/ece": 0.12462517933287802,
"calibration/mean_confidence": 0.6664884744206967,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014496527777777768,
"completions/max_length": 3502.2,
"completions/max_terminated_length": 3502.2,
"completions/mean_length": 670.2757080078125,
"completions/mean_terminated_length": 680.2117797851563,
"completions/min_length": 0.0,
"completions/min_terminated_length": 183.4,
"epoch": 0.2999962500468744,
"grad_norm": 0.0004161894030403346,
"learning_rate": 2.5e-06,
"loss": -0.0132,
"num_tokens": 272889840.0,
"reward": 1.005896532535553,
"reward_std": 0.12880902737379074,
"rewards/accuracy_reward": 0.6809027791023254,
"rewards/brier_reward": 0.7915880799293518,
"rewards/confidence_uniqueness_reward": 0.9347058057785034,
"rewards/format_reward": 0.9855034708976745,
"rewards/frontier_coverage_0": -0.006131393508985639,
"rewards/frontier_coverage_1": -0.006131393508985639,
"rewards/frontier_coverage_10": -0.004654986085370183,
"rewards/frontier_coverage_15": -0.0017717648821417241,
"rewards/frontier_coverage_20": 0.002877543866634369,
"rewards/frontier_coverage_25": 0.02585282623767853,
"rewards/frontier_coverage_5": -0.005564809101633728,
"signal/accuracy_reward/centered_abs_mean": 0.15927734076976777,
"signal/accuracy_reward/group_std_mean": 0.20954229235649108,
"signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07963867038488388,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07963867038488388,
"signal/advantage_abs_mean": 0.0956741526722908,
"signal/advantage_pre_scale_abs_mean": 0.0956741526722908,
"signal/advantage_pre_scale_std": 0.16752077937126159,
"signal/advantage_std": 0.16752077937126159,
"signal/brier_reward/centered_abs_mean": 0.11655332297086715,
"signal/brier_reward/group_std_mean": 0.1505295991897583,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011655332706868648,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011655332706868648,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04199672415852547,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06219554841518402,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004199672443792224,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004199672443792224,
"signal/format_reward/centered_abs_mean": 0.023106553591787815,
"signal/format_reward/group_std_mean": 0.03924813717603683,
"signal/format_reward/group_zero_std_frac": 0.8499999880790711,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011553276795893908,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011553276795893908,
"signal/frontier_coverage_0/centered_abs_mean": 0.10545411854982376,
"signal/frontier_coverage_0/group_std_mean": 0.13934148699045182,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001507993880659342,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001507993880659342,
"signal/frontier_coverage_1/centered_abs_mean": 0.10545411854982376,
"signal/frontier_coverage_1/group_std_mean": 0.13934148699045182,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001507993880659342,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001507993880659342,
"signal/frontier_coverage_10/centered_abs_mean": 0.10189146101474762,
"signal/frontier_coverage_10/group_std_mean": 0.1348419487476349,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014570478349924087,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014570478349924087,
"signal/frontier_coverage_15/centered_abs_mean": 0.09459190368652344,
"signal/frontier_coverage_15/group_std_mean": 0.12573918104171752,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013526642229408025,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013526642229408025,
"signal/frontier_coverage_20/centered_abs_mean": 0.07673133313655853,
"signal/frontier_coverage_20/group_std_mean": 0.1030562549829483,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010972580406814814,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010972580406814814,
"signal/frontier_coverage_25/centered_abs_mean": 0.04959097653627396,
"signal/frontier_coverage_25/group_std_mean": 0.06758146658539772,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007091509876772761,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007091509876772761,
"signal/frontier_coverage_5/centered_abs_mean": 0.10424444675445557,
"signal/frontier_coverage_5/group_std_mean": 0.1378079980611801,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014906955417245626,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014906955417245626,
"step": 125
},
{
"calibration/aurc": 0.26075673968720536,
"calibration/batch_distribution_entropy": 0.874285034631829,
"calibration/buffer_distribution_entropy": 0.8284399180154496,
"calibration/confidence_entropy": 0.5473131223715086,
"calibration/coverage@0%": 0.024374274739239032,
"calibration/coverage@1%": 0.024374274739239032,
"calibration/coverage@10%": 0.13814292939717276,
"calibration/coverage@15%": 0.2927289784951862,
"calibration/coverage@20%": 0.3928601633049443,
"calibration/coverage@25%": 0.46272417076934397,
"calibration/coverage@30%": 0.6714604813734013,
"calibration/coverage@5%": 0.04966937264230805,
"calibration/ece": 0.12237342404558275,
"calibration/mean_confidence": 0.626171849921876,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.021440972222222254,
"completions/max_length": 3600.2,
"completions/max_terminated_length": 3600.2,
"completions/mean_length": 685.9378540039063,
"completions/mean_terminated_length": 701.0846801757813,
"completions/min_length": 0.0,
"completions/min_terminated_length": 168.2,
"epoch": 0.3119961000487494,
"grad_norm": 0.0004247704928275198,
"learning_rate": 2.349397590361446e-06,
"loss": -0.0169,
"num_tokens": 283916644.0,
"reward": 0.9914659857749939,
"reward_std": 0.14033911675214766,
"rewards/accuracy_reward": 0.6625868201255798,
"rewards/brier_reward": 0.7736161351203918,
"rewards/confidence_uniqueness_reward": 0.9349322438240051,
"rewards/format_reward": 0.9785590291023254,
"rewards/frontier_coverage_0": -0.010037094075232744,
"rewards/frontier_coverage_1": -0.010037094075232744,
"rewards/frontier_coverage_10": -0.008441044599749148,
"rewards/frontier_coverage_15": -0.00335610918700695,
"rewards/frontier_coverage_20": 0.008132204459980131,
"rewards/frontier_coverage_25": 0.03618508372455835,
"rewards/frontier_coverage_5": -0.009772640746086836,
"signal/accuracy_reward/centered_abs_mean": 0.17634005844593048,
"signal/accuracy_reward/group_std_mean": 0.23339370787143707,
"signal/accuracy_reward/group_zero_std_frac": 0.3388888895511627,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08817002922296524,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08817002922296524,
"signal/advantage_abs_mean": 0.10400059223175048,
"signal/advantage_pre_scale_abs_mean": 0.10400059223175048,
"signal/advantage_pre_scale_std": 0.17410335540771485,
"signal/advantage_std": 0.17410335540771485,
"signal/brier_reward/centered_abs_mean": 0.12626205682754515,
"signal/brier_reward/group_std_mean": 0.16308861076831818,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012626205757260322,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012626205757260322,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04283556342124939,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06413544788956642,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0042835562489926815,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042835562489926815,
"signal/format_reward/centered_abs_mean": 0.027652995474636555,
"signal/format_reward/group_std_mean": 0.045563656091690066,
"signal/format_reward/group_zero_std_frac": 0.8277777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013826497737318278,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013826497737318278,
"signal/frontier_coverage_0/centered_abs_mean": 0.13005276918411254,
"signal/frontier_coverage_0/group_std_mean": 0.17312212884426117,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001859754603356123,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001859754603356123,
"signal/frontier_coverage_1/centered_abs_mean": 0.13005276918411254,
"signal/frontier_coverage_1/group_std_mean": 0.17312212884426117,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001859754603356123,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001859754603356123,
"signal/frontier_coverage_10/centered_abs_mean": 0.12581277936697005,
"signal/frontier_coverage_10/group_std_mean": 0.16772884130477905,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017991228029131888,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017991228029131888,
"signal/frontier_coverage_15/centered_abs_mean": 0.11518070250749587,
"signal/frontier_coverage_15/group_std_mean": 0.1540255665779114,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016470840433612465,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016470840433612465,
"signal/frontier_coverage_20/centered_abs_mean": 0.08615544736385346,
"signal/frontier_coverage_20/group_std_mean": 0.11641546934843064,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012320228852331638,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012320228852331638,
"signal/frontier_coverage_25/centered_abs_mean": 0.05977813303470612,
"signal/frontier_coverage_25/group_std_mean": 0.07970146983861923,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008548272890038788,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008548272890038788,
"signal/frontier_coverage_5/centered_abs_mean": 0.12958541363477707,
"signal/frontier_coverage_5/group_std_mean": 0.1725340485572815,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001853071292862296,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001853071292862296,
"step": 130
},
{
"calibration/aurc": 0.2546360563314921,
"calibration/batch_distribution_entropy": 0.8330176576430549,
"calibration/buffer_distribution_entropy": 0.8734325642612234,
"calibration/confidence_entropy": 0.4818107536798132,
"calibration/coverage@0%": 0.014179309586631486,
"calibration/coverage@1%": 0.014179309586631486,
"calibration/coverage@10%": 0.23271767810026386,
"calibration/coverage@15%": 0.26649076517150394,
"calibration/coverage@20%": 0.433201793135017,
"calibration/coverage@25%": 0.5304986738889623,
"calibration/coverage@30%": 0.5728792386450393,
"calibration/coverage@5%": 0.1934078166226913,
"calibration/ece": 0.1538258270193908,
"calibration/mean_confidence": 0.7085402165749761,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011892361111111093,
"completions/max_length": 3711.6,
"completions/max_terminated_length": 3711.6,
"completions/mean_length": 664.8523559570312,
"completions/mean_terminated_length": 672.9011596679687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 161.8,
"epoch": 0.32399595005062437,
"grad_norm": 0.000490625505335629,
"learning_rate": 2.1987951807228917e-06,
"loss": -0.0101,
"num_tokens": 294668767.0,
"reward": 1.0130939960479737,
"reward_std": 0.1343725234270096,
"rewards/accuracy_reward": 0.6880208373069763,
"rewards/brier_reward": 0.7956640720367432,
"rewards/confidence_uniqueness_reward": 0.9440826296806335,
"rewards/format_reward": 0.9880208373069763,
"rewards/frontier_coverage_0": -0.0007627993822097778,
"rewards/frontier_coverage_1": -0.0007627993822097778,
"rewards/frontier_coverage_10": 0.0009573293849825859,
"rewards/frontier_coverage_15": 0.004394118906930089,
"rewards/frontier_coverage_20": 0.015732752112671732,
"rewards/frontier_coverage_25": 0.05771690420806408,
"rewards/frontier_coverage_5": -0.000457253772765398,
"signal/accuracy_reward/centered_abs_mean": 0.16697049140930176,
"signal/accuracy_reward/group_std_mean": 0.223639115691185,
"signal/accuracy_reward/group_zero_std_frac": 0.35555556416511536,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08348524570465088,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08348524570465088,
"signal/advantage_abs_mean": 0.09535450786352158,
"signal/advantage_pre_scale_abs_mean": 0.09535450786352158,
"signal/advantage_pre_scale_std": 0.16756429374217988,
"signal/advantage_std": 0.16756429374217988,
"signal/brier_reward/centered_abs_mean": 0.12386199980974197,
"signal/brier_reward/group_std_mean": 0.16375071704387664,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012386200204491615,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012386200204491615,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.035422375053167345,
"signal/confidence_uniqueness_reward/group_std_mean": 0.059968823194503786,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00354223744943738,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00354223744943738,
"signal/format_reward/centered_abs_mean": 0.02140842005610466,
"signal/format_reward/group_std_mean": 0.04312895014882088,
"signal/format_reward/group_zero_std_frac": 0.8166666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01070421002805233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01070421002805233,
"signal/frontier_coverage_0/centered_abs_mean": 0.11899998188018798,
"signal/frontier_coverage_0/group_std_mean": 0.15951877534389497,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001701699779368937,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001701699779368937,
"signal/frontier_coverage_1/centered_abs_mean": 0.11899998188018798,
"signal/frontier_coverage_1/group_std_mean": 0.15951877534389497,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001701699779368937,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001701699779368937,
"signal/frontier_coverage_10/centered_abs_mean": 0.11335770487785339,
"signal/frontier_coverage_10/group_std_mean": 0.1522216647863388,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001621015160344541,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001621015160344541,
"signal/frontier_coverage_15/centered_abs_mean": 0.10240471959114075,
"signal/frontier_coverage_15/group_std_mean": 0.13803330510854722,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014643874485045672,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014643874485045672,
"signal/frontier_coverage_20/centered_abs_mean": 0.0672150082886219,
"signal/frontier_coverage_20/group_std_mean": 0.0915078029036522,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009611746412701905,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009611746412701905,
"signal/frontier_coverage_25/centered_abs_mean": 0.06437275260686874,
"signal/frontier_coverage_25/group_std_mean": 0.08481907844543457,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009205303387716413,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009205303387716413,
"signal/frontier_coverage_5/centered_abs_mean": 0.11798569560050964,
"signal/frontier_coverage_5/group_std_mean": 0.15821486115455627,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00168719538487494,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00168719538487494,
"step": 135
},
{
"calibration/aurc": 0.1854829328737862,
"calibration/batch_distribution_entropy": 0.7996682424965277,
"calibration/buffer_distribution_entropy": 0.8905462538846519,
"calibration/confidence_entropy": 0.41854264390855284,
"calibration/coverage@0%": 0.04533180574298995,
"calibration/coverage@1%": 0.04533180574298995,
"calibration/coverage@10%": 0.24171738663204975,
"calibration/coverage@15%": 0.3580652283340696,
"calibration/coverage@20%": 0.5067775705274844,
"calibration/coverage@25%": 0.7254039905217915,
"calibration/coverage@30%": 0.956267313498989,
"calibration/coverage@5%": 0.12600271378231903,
"calibration/ece": 0.12841547965995068,
"calibration/mean_confidence": 0.7517699707017254,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013541666666666674,
"completions/max_length": 3653.8,
"completions/max_terminated_length": 3653.8,
"completions/mean_length": 649.6119018554688,
"completions/mean_terminated_length": 658.50859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 165.4,
"epoch": 0.33599580005249935,
"grad_norm": 0.0004042711516376585,
"learning_rate": 2.0481927710843377e-06,
"loss": -0.0109,
"num_tokens": 305256520.0,
"reward": 1.0060909271240235,
"reward_std": 0.12792308628559113,
"rewards/accuracy_reward": 0.6741319417953491,
"rewards/brier_reward": 0.7920880436897277,
"rewards/confidence_uniqueness_reward": 0.9383025646209717,
"rewards/format_reward": 0.9864583373069763,
"rewards/frontier_coverage_0": 0.014161212788894772,
"rewards/frontier_coverage_1": 0.014161212788894772,
"rewards/frontier_coverage_10": 0.014498895592987537,
"rewards/frontier_coverage_15": 0.014886665157973766,
"rewards/frontier_coverage_20": 0.027863727882504463,
"rewards/frontier_coverage_25": 0.09292519390583039,
"rewards/frontier_coverage_5": 0.014280988043174148,
"signal/accuracy_reward/centered_abs_mean": 0.15073784589767455,
"signal/accuracy_reward/group_std_mean": 0.2007976531982422,
"signal/accuracy_reward/group_zero_std_frac": 0.4194444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07536892294883728,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07536892294883728,
"signal/advantage_abs_mean": 0.09263349771499634,
"signal/advantage_pre_scale_abs_mean": 0.09263349771499634,
"signal/advantage_pre_scale_std": 0.1665874868631363,
"signal/advantage_std": 0.1665874868631363,
"signal/brier_reward/centered_abs_mean": 0.12627332657575607,
"signal/brier_reward/group_std_mean": 0.16518832445144654,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012627332285046578,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012627332285046578,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037095585465431215,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06054994612932205,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003709558630362153,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003709558630362153,
"signal/format_reward/centered_abs_mean": 0.021560330502688886,
"signal/format_reward/group_std_mean": 0.041833048313856126,
"signal/format_reward/group_zero_std_frac": 0.8194444417953491,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010780165251344443,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010780165251344443,
"signal/frontier_coverage_0/centered_abs_mean": 0.09921992719173431,
"signal/frontier_coverage_0/group_std_mean": 0.13895856738090515,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014188449829816818,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014188449829816818,
"signal/frontier_coverage_1/centered_abs_mean": 0.09921992719173431,
"signal/frontier_coverage_1/group_std_mean": 0.13895856738090515,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014188449829816818,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014188449829816818,
"signal/frontier_coverage_10/centered_abs_mean": 0.09619618356227874,
"signal/frontier_coverage_10/group_std_mean": 0.13501449525356293,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013756054220721125,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013756054220721125,
"signal/frontier_coverage_15/centered_abs_mean": 0.08267004191875457,
"signal/frontier_coverage_15/group_std_mean": 0.1172051951289177,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011821816442534328,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011821816442534328,
"signal/frontier_coverage_20/centered_abs_mean": 0.049167075753211976,
"signal/frontier_coverage_20/group_std_mean": 0.06892770677804946,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000703089137095958,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000703089137095958,
"signal/frontier_coverage_25/centered_abs_mean": 0.08086840957403182,
"signal/frontier_coverage_25/group_std_mean": 0.10296626091003418,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011564183048903942,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011564183048903942,
"signal/frontier_coverage_5/centered_abs_mean": 0.09877839088439941,
"signal/frontier_coverage_5/group_std_mean": 0.13836795836687088,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001412531011737883,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001412531011737883,
"step": 140
},
{
"calibration/aurc": 0.1974786325318561,
"calibration/batch_distribution_entropy": 0.807348167009032,
"calibration/buffer_distribution_entropy": 0.8525369126779999,
"calibration/confidence_entropy": 0.41936921197900173,
"calibration/coverage@0%": 0.020484134148034995,
"calibration/coverage@1%": 0.020484134148034995,
"calibration/coverage@10%": 0.12842496038935974,
"calibration/coverage@15%": 0.3236565417377872,
"calibration/coverage@20%": 0.635893543720149,
"calibration/coverage@25%": 0.7277863040288939,
"calibration/coverage@30%": 0.8788835787859582,
"calibration/coverage@5%": 0.055793119011248435,
"calibration/ece": 0.12143313418001216,
"calibration/mean_confidence": 0.741192283313203,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01067708333333337,
"completions/max_length": 3261.6,
"completions/max_terminated_length": 3261.6,
"completions/mean_length": 632.3050537109375,
"completions/mean_terminated_length": 639.30107421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 182.4,
"epoch": 0.34799565005437433,
"grad_norm": 0.00040693863411433995,
"learning_rate": 1.8975903614457832e-06,
"loss": -0.0105,
"num_tokens": 315605282.0,
"reward": 1.0286210775375366,
"reward_std": 0.11897408664226532,
"rewards/accuracy_reward": 0.7124131917953491,
"rewards/brier_reward": 0.8128675818443298,
"rewards/confidence_uniqueness_reward": 0.9287760734558106,
"rewards/format_reward": 0.9893229246139527,
"rewards/frontier_coverage_0": 0.01579418806359172,
"rewards/frontier_coverage_1": 0.01579418806359172,
"rewards/frontier_coverage_10": 0.016339881264138968,
"rewards/frontier_coverage_15": 0.01718453587964177,
"rewards/frontier_coverage_20": 0.03750094771385193,
"rewards/frontier_coverage_25": 0.1325998529791832,
"rewards/frontier_coverage_5": 0.015739528834819792,
"signal/accuracy_reward/centered_abs_mean": 0.137353515625,
"signal/accuracy_reward/group_std_mean": 0.18777381181716918,
"signal/accuracy_reward/group_zero_std_frac": 0.4361111164093018,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0686767578125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0686767578125,
"signal/advantage_abs_mean": 0.08469511717557907,
"signal/advantage_pre_scale_abs_mean": 0.08469511717557907,
"signal/advantage_pre_scale_std": 0.1597517877817154,
"signal/advantage_std": 0.1597517877817154,
"signal/brier_reward/centered_abs_mean": 0.12741477489471437,
"signal/brier_reward/group_std_mean": 0.16703042685985564,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012741477787494659,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012741477787494659,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04068734273314476,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05980287864804268,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004068734264001251,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004068734264001251,
"signal/format_reward/centered_abs_mean": 0.018288845382630824,
"signal/format_reward/group_std_mean": 0.03258528374135494,
"signal/format_reward/group_zero_std_frac": 0.8750000119209289,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009144422691315412,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009144422691315412,
"signal/frontier_coverage_0/centered_abs_mean": 0.08689655661582947,
"signal/frontier_coverage_0/group_std_mean": 0.11893046051263809,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00124262070748955,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00124262070748955,
"signal/frontier_coverage_1/centered_abs_mean": 0.08689655661582947,
"signal/frontier_coverage_1/group_std_mean": 0.11893046051263809,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00124262070748955,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00124262070748955,
"signal/frontier_coverage_10/centered_abs_mean": 0.08449746146798134,
"signal/frontier_coverage_10/group_std_mean": 0.11587611138820648,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012083137058652937,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012083137058652937,
"signal/frontier_coverage_15/centered_abs_mean": 0.07269451022148132,
"signal/frontier_coverage_15/group_std_mean": 0.1005746454000473,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010395315941423178,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010395315941423178,
"signal/frontier_coverage_20/centered_abs_mean": 0.04770020917057991,
"signal/frontier_coverage_20/group_std_mean": 0.0634695328772068,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006821130053140223,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006821130053140223,
"signal/frontier_coverage_25/centered_abs_mean": 0.08773275762796402,
"signal/frontier_coverage_25/group_std_mean": 0.11324829757213592,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012545783771201967,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012545783771201967,
"signal/frontier_coverage_5/centered_abs_mean": 0.08658337146043778,
"signal/frontier_coverage_5/group_std_mean": 0.11853417456150055,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012381422566249967,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012381422566249967,
"step": 145
},
{
"calibration/aurc": 0.2009286800964662,
"calibration/batch_distribution_entropy": 0.8164642374302046,
"calibration/buffer_distribution_entropy": 0.8201398178798591,
"calibration/confidence_entropy": 0.4014055039722427,
"calibration/coverage@0%": 0.010666666666666668,
"calibration/coverage@1%": 0.010666666666666668,
"calibration/coverage@10%": 0.36389817290552584,
"calibration/coverage@15%": 0.4667266934046346,
"calibration/coverage@20%": 0.5943920900178253,
"calibration/coverage@25%": 0.6405080213903743,
"calibration/coverage@30%": 0.6833778966131907,
"calibration/coverage@5%": 0.09828333333333332,
"calibration/ece": 0.15416181123167888,
"calibration/mean_confidence": 0.7122128912211364,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009722222222222233,
"completions/max_length": 3621.2,
"completions/max_terminated_length": 3621.2,
"completions/mean_length": 713.7203979492188,
"completions/mean_terminated_length": 720.7264038085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 178.2,
"epoch": 0.3599955000562493,
"grad_norm": 0.0004712261143140495,
"learning_rate": 1.7469879518072292e-06,
"loss": -0.0083,
"num_tokens": 326937677.0,
"reward": 1.0151524186134337,
"reward_std": 0.1302838146686554,
"rewards/accuracy_reward": 0.6858506917953491,
"rewards/brier_reward": 0.8010693430900574,
"rewards/confidence_uniqueness_reward": 0.9333727955818176,
"rewards/format_reward": 0.9902777671813965,
"rewards/frontier_coverage_0": 0.021950625255703925,
"rewards/frontier_coverage_1": 0.021950625255703925,
"rewards/frontier_coverage_10": 0.022064855322241783,
"rewards/frontier_coverage_15": 0.022844681143760683,
"rewards/frontier_coverage_20": 0.03186333496123552,
"rewards/frontier_coverage_25": 0.11219749450683594,
"rewards/frontier_coverage_5": 0.021950625255703925,
"signal/accuracy_reward/centered_abs_mean": 0.16527235209941865,
"signal/accuracy_reward/group_std_mean": 0.21440712809562684,
"signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08263617604970933,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08263617604970933,
"signal/advantage_abs_mean": 0.09583060741424561,
"signal/advantage_pre_scale_abs_mean": 0.09583060741424561,
"signal/advantage_pre_scale_std": 0.1677115947008133,
"signal/advantage_std": 0.1677115947008133,
"signal/brier_reward/centered_abs_mean": 0.1361823335289955,
"signal/brier_reward/group_std_mean": 0.17782002985477446,
"signal/brier_reward/group_zero_std_frac": 0.002777777798473835,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013618233613669872,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013618233613669872,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.039950243383646014,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06207837164402008,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.002777777798473835,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003995024506002665,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003995024506002665,
"signal/format_reward/centered_abs_mean": 0.01697048614732921,
"signal/format_reward/group_std_mean": 0.03419107310473919,
"signal/format_reward/group_zero_std_frac": 0.850000011920929,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008485243073664606,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008485243073664606,
"signal/frontier_coverage_0/centered_abs_mean": 0.1078558087348938,
"signal/frontier_coverage_0/group_std_mean": 0.15047508776187896,
"signal/frontier_coverage_0/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015423380769789218,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015423380769789218,
"signal/frontier_coverage_1/centered_abs_mean": 0.1078558087348938,
"signal/frontier_coverage_1/group_std_mean": 0.15047508776187896,
"signal/frontier_coverage_1/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015423380769789218,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015423380769789218,
"signal/frontier_coverage_10/centered_abs_mean": 0.10575756281614304,
"signal/frontier_coverage_10/group_std_mean": 0.1477883592247963,
"signal/frontier_coverage_10/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015123330289497972,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015123330289497972,
"signal/frontier_coverage_15/centered_abs_mean": 0.09201570004224777,
"signal/frontier_coverage_15/group_std_mean": 0.12966947257518768,
"signal/frontier_coverage_15/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013158244779333471,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013158244779333471,
"signal/frontier_coverage_20/centered_abs_mean": 0.05600855126976967,
"signal/frontier_coverage_20/group_std_mean": 0.07790684998035431,
"signal/frontier_coverage_20/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008009222452528775,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008009222452528775,
"signal/frontier_coverage_25/centered_abs_mean": 0.08503240048885345,
"signal/frontier_coverage_25/group_std_mean": 0.1091775730252266,
"signal/frontier_coverage_25/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012159633450210094,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012159633450210094,
"signal/frontier_coverage_5/centered_abs_mean": 0.1078558087348938,
"signal/frontier_coverage_5/group_std_mean": 0.15047508776187896,
"signal/frontier_coverage_5/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015423380769789218,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015423380769789218,
"step": 150
},
{
"epoch": 0.3599955000562493,
"eval_calibration/aurc": 0.1497119816911291,
"eval_calibration/batch_distribution_entropy": 0.7968346178805156,
"eval_calibration/buffer_distribution_entropy": 0.8407002187440905,
"eval_calibration/confidence_entropy": 0.4125144127540233,
"eval_calibration/coverage@0%": 0.20950940860215053,
"eval_calibration/coverage@1%": 0.20950940860215053,
"eval_calibration/coverage@10%": 0.43934811827956993,
"eval_calibration/coverage@15%": 0.5816532258064516,
"eval_calibration/coverage@20%": 0.6876680107526881,
"eval_calibration/coverage@25%": 0.8776881720430106,
"eval_calibration/coverage@30%": 0.9786626344086021,
"eval_calibration/coverage@5%": 0.2824260752688172,
"eval_calibration/ece": 0.16998011964569013,
"eval_calibration/mean_confidence": 0.7282188050854926,
"eval_completions/clipped_ratio": 0.008680555555555561,
"eval_completions/max_length": 2416.5,
"eval_completions/max_terminated_length": 2416.5,
"eval_completions/mean_length": 695.3529561360677,
"eval_completions/mean_terminated_length": 701.4133707682291,
"eval_completions/min_length": 54.666666666666664,
"eval_completions/min_terminated_length": 227.16666666666666,
"eval_loss": 0.0,
"eval_num_tokens": 326937677.0,
"eval_reward": 1.0095117688179016,
"eval_reward_std": 0.24901040395100912,
"eval_rewards/accuracy_reward": 0.6796875099341074,
"eval_rewards/brier_reward": 0.8043731153011322,
"eval_rewards/confidence_uniqueness_reward": 0.8918871482213339,
"eval_rewards/format_reward": 0.9913194477558136,
"eval_rewards/frontier_coverage_0": 0.034088116294393934,
"eval_rewards/frontier_coverage_1": 0.034088116294393934,
"eval_rewards/frontier_coverage_10": 0.03365040601541599,
"eval_rewards/frontier_coverage_15": 0.033414963788042464,
"eval_rewards/frontier_coverage_20": 0.037281897539893784,
"eval_rewards/frontier_coverage_25": 0.0998396414021651,
"eval_rewards/frontier_coverage_5": 0.034088116294393934,
"eval_runtime": 190.7746,
"eval_samples_per_second": 5.242,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4189995676279068,
"eval_signal/accuracy_reward/group_std_mean": 0.4633843054374059,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2094997838139534,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2094997838139534,
"eval_signal/advantage_abs_mean": 0.2170354425907135,
"eval_signal/advantage_pre_scale_abs_mean": 0.2170354425907135,
"eval_signal/advantage_pre_scale_std": 0.24843567858139673,
"eval_signal/advantage_std": 0.24843567858139673,
"eval_signal/brier_reward/centered_abs_mean": 0.2193461755911509,
"eval_signal/brier_reward/group_std_mean": 0.27922573685646057,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02193461824208498,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02193461824208498,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04991401235262553,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07574755760530631,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0049914012585456176,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0049914012585456176,
"eval_signal/format_reward/centered_abs_mean": 0.016493055348594982,
"eval_signal/format_reward/group_std_mean": 0.04259948432445526,
"eval_signal/format_reward/group_zero_std_frac": 0.7777778009573618,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.008246527674297491,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.008246527674297491,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.1894952729344368,
"eval_signal/frontier_coverage_0/group_std_mean": 0.30272159973780316,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027097822166979313,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027097822166979313,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.1894952729344368,
"eval_signal/frontier_coverage_1/group_std_mean": 0.30272159973780316,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027097822166979313,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027097822166979313,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.18484538545211157,
"eval_signal/frontier_coverage_10/group_std_mean": 0.29638702670733136,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026432890444993973,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026432890444993973,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.15993489821751913,
"eval_signal/frontier_coverage_15/group_std_mean": 0.26100187251965207,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022870690639441213,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022870690639441213,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.08387432868282,
"eval_signal/frontier_coverage_20/group_std_mean": 0.1353093981742859,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011994028852010767,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011994028852010767,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.13570485015710196,
"eval_signal/frontier_coverage_25/group_std_mean": 0.16785304248332977,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019405794446356595,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019405794446356595,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.1894952729344368,
"eval_signal/frontier_coverage_5/group_std_mean": 0.30272159973780316,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027097822166979313,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027097822166979313,
"eval_steps_per_second": 0.031,
"step": 150
},
{
"calibration/aurc": 0.17029982830875182,
"calibration/batch_distribution_entropy": 0.828323867142758,
"calibration/buffer_distribution_entropy": 0.8372604232456323,
"calibration/confidence_entropy": 0.40143344705114015,
"calibration/coverage@0%": 0.002617801047120419,
"calibration/coverage@1%": 0.002617801047120419,
"calibration/coverage@10%": 0.5048941513727101,
"calibration/coverage@15%": 0.5596638265431797,
"calibration/coverage@20%": 0.6364728575365114,
"calibration/coverage@25%": 0.7473739322127307,
"calibration/coverage@30%": 0.8836594103058694,
"calibration/coverage@5%": 0.002617801047120419,
"calibration/ece": 0.13330443000274134,
"calibration/mean_confidence": 0.7272123755133635,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008940972222222232,
"completions/max_length": 3450.4,
"completions/max_terminated_length": 3450.4,
"completions/mean_length": 683.6056518554688,
"completions/mean_terminated_length": 689.88115234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 187.0,
"epoch": 0.3719953500581243,
"grad_norm": 0.00048128137132152915,
"learning_rate": 1.5963855421686747e-06,
"loss": -0.0059,
"num_tokens": 337920526.0,
"reward": 1.0457221508026122,
"reward_std": 0.1301838055253029,
"rewards/accuracy_reward": 0.7413194417953491,
"rewards/brier_reward": 0.8202741265296936,
"rewards/confidence_uniqueness_reward": 0.942044448852539,
"rewards/format_reward": 0.9910590171813964,
"rewards/frontier_coverage_0": 0.0016106660943478346,
"rewards/frontier_coverage_1": 0.0016106660943478346,
"rewards/frontier_coverage_10": 0.002487003430724144,
"rewards/frontier_coverage_15": 0.008382186014205217,
"rewards/frontier_coverage_20": 0.040102506056427956,
"rewards/frontier_coverage_25": 0.17503876686096193,
"rewards/frontier_coverage_5": 0.0016106660943478346,
"signal/accuracy_reward/centered_abs_mean": 0.16593966782093048,
"signal/accuracy_reward/group_std_mean": 0.21918415725231172,
"signal/accuracy_reward/group_zero_std_frac": 0.3805555641651154,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08296983391046524,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08296983391046524,
"signal/advantage_abs_mean": 0.09460719525814057,
"signal/advantage_pre_scale_abs_mean": 0.09460719525814057,
"signal/advantage_pre_scale_std": 0.1647391140460968,
"signal/advantage_std": 0.1647391140460968,
"signal/brier_reward/centered_abs_mean": 0.13260589838027953,
"signal/brier_reward/group_std_mean": 0.17412539422512055,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013260589353740216,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013260589353740216,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.033828570321202275,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05308753773570061,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003382857143878937,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003382857143878937,
"signal/format_reward/centered_abs_mean": 0.015771484561264516,
"signal/format_reward/group_std_mean": 0.03148765973746777,
"signal/format_reward/group_zero_std_frac": 0.8638889074325562,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007885742280632258,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007885742280632258,
"signal/frontier_coverage_0/centered_abs_mean": 0.12180711925029755,
"signal/frontier_coverage_0/group_std_mean": 0.16962958872318268,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017418418079614638,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017418418079614638,
"signal/frontier_coverage_1/centered_abs_mean": 0.12180711925029755,
"signal/frontier_coverage_1/group_std_mean": 0.16962958872318268,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017418418079614638,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017418418079614638,
"signal/frontier_coverage_10/centered_abs_mean": 0.11921639740467072,
"signal/frontier_coverage_10/group_std_mean": 0.16621364057064056,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017047945875674486,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017047945875674486,
"signal/frontier_coverage_15/centered_abs_mean": 0.0924990564584732,
"signal/frontier_coverage_15/group_std_mean": 0.13051227778196334,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001322736474685371,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001322736474685371,
"signal/frontier_coverage_20/centered_abs_mean": 0.05612751841545105,
"signal/frontier_coverage_20/group_std_mean": 0.07590975016355514,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008026235154829919,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008026235154829919,
"signal/frontier_coverage_25/centered_abs_mean": 0.11158772855997086,
"signal/frontier_coverage_25/group_std_mean": 0.14441257119178771,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015957045601680876,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015957045601680876,
"signal/frontier_coverage_5/centered_abs_mean": 0.12180711925029755,
"signal/frontier_coverage_5/group_std_mean": 0.16962958872318268,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017418418079614638,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017418418079614638,
"step": 155
},
{
"calibration/aurc": 0.13742324374109022,
"calibration/batch_distribution_entropy": 0.8046436261756036,
"calibration/buffer_distribution_entropy": 0.8454471239403271,
"calibration/confidence_entropy": 0.4209601948297349,
"calibration/coverage@0%": 0.030325589005235597,
"calibration/coverage@1%": 0.030325589005235597,
"calibration/coverage@10%": 0.6808016148896961,
"calibration/coverage@15%": 0.7603269147084422,
"calibration/coverage@20%": 0.781201044386423,
"calibration/coverage@25%": 0.8,
"calibration/coverage@30%": 0.8410526315789474,
"calibration/coverage@5%": 0.3801644545744061,
"calibration/ece": 0.12313236262405054,
"calibration/mean_confidence": 0.7447349905237983,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010069444444444442,
"completions/max_length": 3256.0,
"completions/max_terminated_length": 3256.0,
"completions/mean_length": 697.6481811523438,
"completions/mean_terminated_length": 704.7627807617188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 187.6,
"epoch": 0.38399520005999926,
"grad_norm": 0.00042031033081002533,
"learning_rate": 1.4457831325301204e-06,
"loss": -0.0081,
"num_tokens": 349044729.0,
"reward": 1.011973214149475,
"reward_std": 0.1254075601696968,
"rewards/accuracy_reward": 0.6768229246139527,
"rewards/brier_reward": 0.7973306894302368,
"rewards/confidence_uniqueness_reward": 0.9424768567085267,
"rewards/format_reward": 0.98984375,
"rewards/frontier_coverage_0": 0.020558654330670834,
"rewards/frontier_coverage_1": 0.020558654330670834,
"rewards/frontier_coverage_10": 0.020932418294250965,
"rewards/frontier_coverage_15": 0.022469326481223108,
"rewards/frontier_coverage_20": 0.0480774313211441,
"rewards/frontier_coverage_25": 0.17265710532665252,
"rewards/frontier_coverage_5": 0.020558654330670834,
"signal/accuracy_reward/centered_abs_mean": 0.15957573652267457,
"signal/accuracy_reward/group_std_mean": 0.20743002891540527,
"signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07978786826133728,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07978786826133728,
"signal/advantage_abs_mean": 0.09231876432895661,
"signal/advantage_pre_scale_abs_mean": 0.09231876432895661,
"signal/advantage_pre_scale_std": 0.16247815191745757,
"signal/advantage_std": 0.16247815191745757,
"signal/brier_reward/centered_abs_mean": 0.13798875659704207,
"signal/brier_reward/group_std_mean": 0.17662995755672456,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013798876665532589,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013798876665532589,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.033293415978550914,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05347738191485405,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003329341718927026,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003329341718927026,
"signal/format_reward/centered_abs_mean": 0.01769205741584301,
"signal/format_reward/group_std_mean": 0.03484956584870815,
"signal/format_reward/group_zero_std_frac": 0.85,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008846028707921505,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008846028707921505,
"signal/frontier_coverage_0/centered_abs_mean": 0.13067993819713591,
"signal/frontier_coverage_0/group_std_mean": 0.17672376036643983,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018687231000512837,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018687231000512837,
"signal/frontier_coverage_1/centered_abs_mean": 0.13067993819713591,
"signal/frontier_coverage_1/group_std_mean": 0.17672376036643983,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018687231000512837,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018687231000512837,
"signal/frontier_coverage_10/centered_abs_mean": 0.1259875252842903,
"signal/frontier_coverage_10/group_std_mean": 0.17063040137290955,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018016215413808822,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018016215413808822,
"signal/frontier_coverage_15/centered_abs_mean": 0.09146946370601654,
"signal/frontier_coverage_15/group_std_mean": 0.12508394569158554,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001308013335801661,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001308013335801661,
"signal/frontier_coverage_20/centered_abs_mean": 0.058338577300310133,
"signal/frontier_coverage_20/group_std_mean": 0.07590894401073456,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008342416607774794,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008342416607774794,
"signal/frontier_coverage_25/centered_abs_mean": 0.11929452270269394,
"signal/frontier_coverage_25/group_std_mean": 0.152734637260437,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001705911778844893,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001705911778844893,
"signal/frontier_coverage_5/centered_abs_mean": 0.13067993819713591,
"signal/frontier_coverage_5/group_std_mean": 0.17672376036643983,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018687231000512837,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018687231000512837,
"step": 160
},
{
"calibration/aurc": 0.1656603853248881,
"calibration/batch_distribution_entropy": 0.8566291969090212,
"calibration/buffer_distribution_entropy": 0.8535527032072778,
"calibration/confidence_entropy": 0.40443817859718545,
"calibration/coverage@0%": 0.017381097120307547,
"calibration/coverage@1%": 0.017381097120307547,
"calibration/coverage@10%": 0.49995407558530747,
"calibration/coverage@15%": 0.5949152537180693,
"calibration/coverage@20%": 0.6571557394509561,
"calibration/coverage@25%": 0.7211049372044049,
"calibration/coverage@30%": 0.8517150160293824,
"calibration/coverage@5%": 0.057801044626869226,
"calibration/ece": 0.11454567795901449,
"calibration/mean_confidence": 0.667435459092505,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013107638888888884,
"completions/max_length": 3570.0,
"completions/max_terminated_length": 3570.0,
"completions/mean_length": 726.2890747070312,
"completions/mean_terminated_length": 736.1373657226562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.6,
"epoch": 0.39599505006187424,
"grad_norm": 0.00045935352682136,
"learning_rate": 1.2951807228915664e-06,
"loss": -0.0097,
"num_tokens": 360550651.0,
"reward": 1.0085692167282105,
"reward_std": 0.12360656410455703,
"rewards/accuracy_reward": 0.6678819537162781,
"rewards/brier_reward": 0.8069667339324951,
"rewards/confidence_uniqueness_reward": 0.9387310028076172,
"rewards/format_reward": 0.9868923664093018,
"rewards/frontier_coverage_0": 0.04505334049463272,
"rewards/frontier_coverage_1": 0.04505334049463272,
"rewards/frontier_coverage_10": 0.044467170163989066,
"rewards/frontier_coverage_15": 0.041498401761054994,
"rewards/frontier_coverage_20": 0.06552209258079529,
"rewards/frontier_coverage_25": 0.17571614384651185,
"rewards/frontier_coverage_5": 0.045087074488401414,
"signal/accuracy_reward/centered_abs_mean": 0.14957682192325591,
"signal/accuracy_reward/group_std_mean": 0.20039042532444,
"signal/accuracy_reward/group_zero_std_frac": 0.4194444417953491,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07478841096162796,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07478841096162796,
"signal/advantage_abs_mean": 0.09047110676765442,
"signal/advantage_pre_scale_abs_mean": 0.09047110676765442,
"signal/advantage_pre_scale_std": 0.16116170585155487,
"signal/advantage_std": 0.16116170585155487,
"signal/brier_reward/centered_abs_mean": 0.13711402416229249,
"signal/brier_reward/group_std_mean": 0.17915517389774321,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013711402378976344,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013711402378976344,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03691897690296173,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05648680925369263,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036918976344168185,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036918976344168185,
"signal/format_reward/centered_abs_mean": 0.020296223647892474,
"signal/format_reward/group_std_mean": 0.03649218082427978,
"signal/format_reward/group_zero_std_frac": 0.8527777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010148111823946237,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010148111823946237,
"signal/frontier_coverage_0/centered_abs_mean": 0.13668433278799058,
"signal/frontier_coverage_0/group_std_mean": 0.1877914160490036,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019545859657227995,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019545859657227995,
"signal/frontier_coverage_1/centered_abs_mean": 0.13668433278799058,
"signal/frontier_coverage_1/group_std_mean": 0.1877914160490036,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019545859657227995,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019545859657227995,
"signal/frontier_coverage_10/centered_abs_mean": 0.13133783787488937,
"signal/frontier_coverage_10/group_std_mean": 0.18079141676425933,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018781311810016632,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018781311810016632,
"signal/frontier_coverage_15/centered_abs_mean": 0.08820350021123886,
"signal/frontier_coverage_15/group_std_mean": 0.12286703139543534,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012613100348971783,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012613100348971783,
"signal/frontier_coverage_20/centered_abs_mean": 0.062378589808940885,
"signal/frontier_coverage_20/group_std_mean": 0.0826146811246872,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008920137654058636,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008920137654058636,
"signal/frontier_coverage_25/centered_abs_mean": 0.11272307336330414,
"signal/frontier_coverage_25/group_std_mean": 0.14559331238269807,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016119398642331362,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016119398642331362,
"signal/frontier_coverage_5/centered_abs_mean": 0.1365072175860405,
"signal/frontier_coverage_5/group_std_mean": 0.1875326693058014,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019520531874150037,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019520531874150037,
"step": 165
},
{
"calibration/aurc": 0.13592004663092033,
"calibration/batch_distribution_entropy": 0.7486099576692926,
"calibration/buffer_distribution_entropy": 0.8564281533478454,
"calibration/confidence_entropy": 0.37029655417641594,
"calibration/coverage@0%": 0.033480513444424695,
"calibration/coverage@1%": 0.07117684852295873,
"calibration/coverage@10%": 0.4261042923170245,
"calibration/coverage@15%": 0.6691992052004629,
"calibration/coverage@20%": 0.7751633169909933,
"calibration/coverage@25%": 0.8759031317778053,
"calibration/coverage@30%": 0.9725213090748339,
"calibration/coverage@5%": 0.1496657690046888,
"calibration/ece": 0.09613408015602516,
"calibration/mean_confidence": 0.7586426281695957,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009548611111111138,
"completions/max_length": 3430.4,
"completions/max_terminated_length": 3430.4,
"completions/mean_length": 703.7692016601562,
"completions/mean_terminated_length": 710.5459350585937,
"completions/min_length": 0.0,
"completions/min_terminated_length": 186.0,
"epoch": 0.4079949000637492,
"grad_norm": 0.0004403712518978864,
"learning_rate": 1.1445783132530121e-06,
"loss": -0.0087,
"num_tokens": 371747256.0,
"reward": 1.0362069845199584,
"reward_std": 0.12289563715457916,
"rewards/accuracy_reward": 0.722569465637207,
"rewards/brier_reward": 0.8146629929542542,
"rewards/confidence_uniqueness_reward": 0.9312249541282653,
"rewards/format_reward": 0.9904513955116272,
"rewards/frontier_coverage_0": 0.015381347015500068,
"rewards/frontier_coverage_1": 0.015381347015500068,
"rewards/frontier_coverage_10": 0.016636411473155022,
"rewards/frontier_coverage_15": 0.02498224622104317,
"rewards/frontier_coverage_20": 0.07068880349397659,
"rewards/frontier_coverage_25": 0.19831772446632384,
"rewards/frontier_coverage_5": 0.01580010838806629,
"signal/accuracy_reward/centered_abs_mean": 0.1472873270511627,
"signal/accuracy_reward/group_std_mean": 0.19956836700439454,
"signal/accuracy_reward/group_zero_std_frac": 0.4138888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07364366352558135,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07364366352558135,
"signal/advantage_abs_mean": 0.08684393763542175,
"signal/advantage_pre_scale_abs_mean": 0.08684393763542175,
"signal/advantage_pre_scale_std": 0.1598696678876877,
"signal/advantage_std": 0.1598696678876877,
"signal/brier_reward/centered_abs_mean": 0.13169292807579042,
"signal/brier_reward/group_std_mean": 0.17282358705997466,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013169292360544205,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013169292360544205,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.039322879165410995,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06056636646389961,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003932288242504,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003932288242504,
"signal/format_reward/centered_abs_mean": 0.01654730923473835,
"signal/format_reward/group_std_mean": 0.033677156642079355,
"signal/format_reward/group_zero_std_frac": 0.85,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008273654617369175,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008273654617369175,
"signal/frontier_coverage_0/centered_abs_mean": 0.11836729645729065,
"signal/frontier_coverage_0/group_std_mean": 0.16220209896564483,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016926524229347705,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016926524229347705,
"signal/frontier_coverage_1/centered_abs_mean": 0.11836729645729065,
"signal/frontier_coverage_1/group_std_mean": 0.16220209896564483,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016926524229347705,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016926524229347705,
"signal/frontier_coverage_10/centered_abs_mean": 0.11148134768009185,
"signal/frontier_coverage_10/group_std_mean": 0.15295161604881286,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015941831981763244,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015941831981763244,
"signal/frontier_coverage_15/centered_abs_mean": 0.07999386936426163,
"signal/frontier_coverage_15/group_std_mean": 0.10980904847383499,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001143912342377007,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001143912342377007,
"signal/frontier_coverage_20/centered_abs_mean": 0.0641142837703228,
"signal/frontier_coverage_20/group_std_mean": 0.08305520564317703,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009168342221528292,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009168342221528292,
"signal/frontier_coverage_25/centered_abs_mean": 0.11506871432065964,
"signal/frontier_coverage_25/group_std_mean": 0.14938458502292634,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016454826574772597,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016454826574772597,
"signal/frontier_coverage_5/centered_abs_mean": 0.11742766797542573,
"signal/frontier_coverage_5/group_std_mean": 0.1609276831150055,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001679215719923377,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001679215719923377,
"step": 170
},
{
"calibration/aurc": 0.13348262780460535,
"calibration/batch_distribution_entropy": 0.8223084451849527,
"calibration/buffer_distribution_entropy": 0.8346515524361726,
"calibration/confidence_entropy": 0.3854238749390199,
"calibration/coverage@0%": 0.036539926699624505,
"calibration/coverage@1%": 0.036539926699624505,
"calibration/coverage@10%": 0.422825227026967,
"calibration/coverage@15%": 0.5584091115953049,
"calibration/coverage@20%": 0.8452149741820681,
"calibration/coverage@25%": 0.9379946198087186,
"calibration/coverage@30%": 0.9821740872231969,
"calibration/coverage@5%": 0.13151984499498745,
"calibration/ece": 0.0950542819703474,
"calibration/mean_confidence": 0.7023832506280211,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012934027777777812,
"completions/max_length": 3636.2,
"completions/max_terminated_length": 3636.2,
"completions/mean_length": 736.4591186523437,
"completions/mean_terminated_length": 746.1770385742187,
"completions/min_length": 0.0,
"completions/min_terminated_length": 198.8,
"epoch": 0.4199947500656242,
"grad_norm": 0.00045567096094600856,
"learning_rate": 9.93975903614458e-07,
"loss": -0.0117,
"num_tokens": 383339233.0,
"reward": 1.0289367198944093,
"reward_std": 0.1303061842918396,
"rewards/accuracy_reward": 0.71171875,
"rewards/brier_reward": 0.8070045828819274,
"rewards/confidence_uniqueness_reward": 0.9229098796844483,
"rewards/format_reward": 0.9870659708976746,
"rewards/frontier_coverage_0": 0.01920226626098156,
"rewards/frontier_coverage_1": 0.01920226626098156,
"rewards/frontier_coverage_10": 0.01852501593530178,
"rewards/frontier_coverage_15": 0.029568823985755444,
"rewards/frontier_coverage_20": 0.10787947475910187,
"rewards/frontier_coverage_25": 0.2446742206811905,
"rewards/frontier_coverage_5": 0.019195317476987838,
"signal/accuracy_reward/centered_abs_mean": 0.1561903178691864,
"signal/accuracy_reward/group_std_mean": 0.2148987740278244,
"signal/accuracy_reward/group_zero_std_frac": 0.3638888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0780951589345932,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0780951589345932,
"signal/advantage_abs_mean": 0.0919778436422348,
"signal/advantage_pre_scale_abs_mean": 0.0919778436422348,
"signal/advantage_pre_scale_std": 0.16701272428035735,
"signal/advantage_std": 0.16701272428035735,
"signal/brier_reward/centered_abs_mean": 0.14022985696792603,
"signal/brier_reward/group_std_mean": 0.18417258262634278,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014022985659539699,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014022985659539699,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04660597518086433,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06790307313203811,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0046605975832790135,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0046605975832790135,
"signal/format_reward/centered_abs_mean": 0.02120768204331398,
"signal/format_reward/group_std_mean": 0.03753828890621662,
"signal/format_reward/group_zero_std_frac": 0.8499999880790711,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01060384102165699,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01060384102165699,
"signal/frontier_coverage_0/centered_abs_mean": 0.13295696824789047,
"signal/frontier_coverage_0/group_std_mean": 0.18069115579128264,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019012847449630498,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019012847449630498,
"signal/frontier_coverage_1/centered_abs_mean": 0.13295696824789047,
"signal/frontier_coverage_1/group_std_mean": 0.18069115579128264,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019012847449630498,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019012847449630498,
"signal/frontier_coverage_10/centered_abs_mean": 0.11907797455787658,
"signal/frontier_coverage_10/group_std_mean": 0.16228229701519012,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017028149915859104,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017028149915859104,
"signal/frontier_coverage_15/centered_abs_mean": 0.07870914041996002,
"signal/frontier_coverage_15/group_std_mean": 0.10632596611976623,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00112554068909958,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00112554068909958,
"signal/frontier_coverage_20/centered_abs_mean": 0.08249068260192871,
"signal/frontier_coverage_20/group_std_mean": 0.10877174586057663,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011796167120337487,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011796167120337487,
"signal/frontier_coverage_25/centered_abs_mean": 0.1382613003253937,
"signal/frontier_coverage_25/group_std_mean": 0.1832427829504013,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019771367078647017,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019771367078647017,
"signal/frontier_coverage_5/centered_abs_mean": 0.13113310188055038,
"signal/frontier_coverage_5/group_std_mean": 0.17831478118896485,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018752032425254582,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018752032425254582,
"step": 175
},
{
"calibration/aurc": 0.09345007181896194,
"calibration/batch_distribution_entropy": 0.801623303579988,
"calibration/buffer_distribution_entropy": 0.8262189453353539,
"calibration/confidence_entropy": 0.38592001304418866,
"calibration/coverage@0%": 0.08496741084676991,
"calibration/coverage@1%": 0.14393717568439587,
"calibration/coverage@10%": 0.6195064116269997,
"calibration/coverage@15%": 0.8078410920640862,
"calibration/coverage@20%": 0.9067183277342291,
"calibration/coverage@25%": 0.9640873460246361,
"calibration/coverage@30%": 0.9936842105263158,
"calibration/coverage@5%": 0.3787979947574124,
"calibration/ece": 0.09400609019464161,
"calibration/mean_confidence": 0.7386594045732192,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011631944444444441,
"completions/max_length": 3725.4,
"completions/max_terminated_length": 3725.4,
"completions/mean_length": 705.6876831054688,
"completions/mean_terminated_length": 713.9794677734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 216.8,
"epoch": 0.4319946000674992,
"grad_norm": 0.0004313621611800045,
"learning_rate": 8.433734939759036e-07,
"loss": -0.0093,
"num_tokens": 394568723.0,
"reward": 1.0281787872314454,
"reward_std": 0.12547616958618163,
"rewards/accuracy_reward": 0.7125000119209289,
"rewards/brier_reward": 0.7898874640464782,
"rewards/confidence_uniqueness_reward": 0.9186937093734742,
"rewards/format_reward": 0.9881944537162781,
"rewards/frontier_coverage_0": 0.010317787062376738,
"rewards/frontier_coverage_1": 0.010276203881949187,
"rewards/frontier_coverage_10": 0.013230977579951286,
"rewards/frontier_coverage_15": 0.0318543815985322,
"rewards/frontier_coverage_20": 0.12444168329238892,
"rewards/frontier_coverage_25": 0.2868518948554993,
"rewards/frontier_coverage_5": 0.010678381472826005,
"signal/accuracy_reward/centered_abs_mean": 0.15108506977558137,
"signal/accuracy_reward/group_std_mean": 0.1995271176099777,
"signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07554253488779068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07554253488779068,
"signal/advantage_abs_mean": 0.09149419367313386,
"signal/advantage_pre_scale_abs_mean": 0.09149419367313386,
"signal/advantage_pre_scale_std": 0.16792958378791809,
"signal/advantage_std": 0.16792958378791809,
"signal/brier_reward/centered_abs_mean": 0.140881010890007,
"signal/brier_reward/group_std_mean": 0.1828139305114746,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01408810093998909,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01408810093998909,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04758927077054977,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06954466402530671,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004758927039802074,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004758927039802074,
"signal/format_reward/centered_abs_mean": 0.019726562313735485,
"signal/format_reward/group_std_mean": 0.037197813764214514,
"signal/format_reward/group_zero_std_frac": 0.8444444417953492,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009863281156867743,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009863281156867743,
"signal/frontier_coverage_0/centered_abs_mean": 0.12390959560871125,
"signal/frontier_coverage_0/group_std_mean": 0.17109252214431764,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017719071358442307,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017719071358442307,
"signal/frontier_coverage_1/centered_abs_mean": 0.12380760312080383,
"signal/frontier_coverage_1/group_std_mean": 0.17095208466053008,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017704485915601253,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017704485915601253,
"signal/frontier_coverage_10/centered_abs_mean": 0.10980342477560043,
"signal/frontier_coverage_10/group_std_mean": 0.1521240144968033,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015701889526098967,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015701889526098967,
"signal/frontier_coverage_15/centered_abs_mean": 0.0799461305141449,
"signal/frontier_coverage_15/group_std_mean": 0.10864663273096084,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011432296945713461,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011432296945713461,
"signal/frontier_coverage_20/centered_abs_mean": 0.08835995942354202,
"signal/frontier_coverage_20/group_std_mean": 0.11567166298627854,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001263547409325838,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001263547409325838,
"signal/frontier_coverage_25/centered_abs_mean": 0.16603110134601592,
"signal/frontier_coverage_25/group_std_mean": 0.21807546317577362,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023742446210235357,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023742446210235357,
"signal/frontier_coverage_5/centered_abs_mean": 0.1223075494170189,
"signal/frontier_coverage_5/group_std_mean": 0.16897510588169098,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017489979742094874,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017489979742094874,
"step": 180
},
{
"calibration/aurc": 0.17122914709216733,
"calibration/batch_distribution_entropy": 0.7880253868471856,
"calibration/buffer_distribution_entropy": 0.8186282369699118,
"calibration/confidence_entropy": 0.3642710142144591,
"calibration/coverage@0%": 0.018777247401564758,
"calibration/coverage@1%": 0.018777247401564758,
"calibration/coverage@10%": 0.1579079193038335,
"calibration/coverage@15%": 0.5548855949538294,
"calibration/coverage@20%": 0.8632928767984499,
"calibration/coverage@25%": 0.9036374379904082,
"calibration/coverage@30%": 0.9350785340314136,
"calibration/coverage@5%": 0.018777247401564758,
"calibration/ece": 0.14703350637447332,
"calibration/mean_confidence": 0.7192671705590277,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014843750000000022,
"completions/max_length": 3571.8,
"completions/max_terminated_length": 3571.8,
"completions/mean_length": 706.883251953125,
"completions/mean_terminated_length": 717.6452026367188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 183.0,
"epoch": 0.44399445006937416,
"grad_norm": 0.00048535055248066783,
"learning_rate": 6.927710843373495e-07,
"loss": -0.0105,
"num_tokens": 405802066.0,
"reward": 1.014847993850708,
"reward_std": 0.13151782006025314,
"rewards/accuracy_reward": 0.686718761920929,
"rewards/brier_reward": 0.7976317048072815,
"rewards/confidence_uniqueness_reward": 0.922605574131012,
"rewards/format_reward": 0.9850694537162781,
"rewards/frontier_coverage_0": 0.026550618838518857,
"rewards/frontier_coverage_1": 0.026589373406022788,
"rewards/frontier_coverage_10": 0.027571763657033444,
"rewards/frontier_coverage_15": 0.03620800599455833,
"rewards/frontier_coverage_20": 0.08736573904752731,
"rewards/frontier_coverage_25": 0.25345793068408967,
"rewards/frontier_coverage_5": 0.026884720474481583,
"signal/accuracy_reward/centered_abs_mean": 0.1610948324203491,
"signal/accuracy_reward/group_std_mean": 0.21088041365146637,
"signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08054741621017455,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08054741621017455,
"signal/advantage_abs_mean": 0.09806140363216401,
"signal/advantage_pre_scale_abs_mean": 0.09806140363216401,
"signal/advantage_pre_scale_std": 0.16944925785064696,
"signal/advantage_std": 0.16944925785064696,
"signal/brier_reward/centered_abs_mean": 0.14560845494270325,
"signal/brier_reward/group_std_mean": 0.18695748448371888,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014560846239328384,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014560846239328384,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.045430511236190796,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06660099476575851,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00454305112361908,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00454305112361908,
"signal/format_reward/centered_abs_mean": 0.0214952252805233,
"signal/format_reward/group_std_mean": 0.037768884748220447,
"signal/format_reward/group_zero_std_frac": 0.850000011920929,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01074761264026165,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01074761264026165,
"signal/frontier_coverage_0/centered_abs_mean": 0.13311404585838318,
"signal/frontier_coverage_0/group_std_mean": 0.18202302753925323,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019035307923331857,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019035307923331857,
"signal/frontier_coverage_1/centered_abs_mean": 0.1330111652612686,
"signal/frontier_coverage_1/group_std_mean": 0.18189111053943635,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019020596519112587,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019020596519112587,
"signal/frontier_coverage_10/centered_abs_mean": 0.12245990186929703,
"signal/frontier_coverage_10/group_std_mean": 0.16796686351299286,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001751176593825221,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001751176593825221,
"signal/frontier_coverage_15/centered_abs_mean": 0.09790500402450561,
"signal/frontier_coverage_15/group_std_mean": 0.13461994379758835,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014000415336340665,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014000415336340665,
"signal/frontier_coverage_20/centered_abs_mean": 0.07675344049930573,
"signal/frontier_coverage_20/group_std_mean": 0.10184175372123719,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010975741781294346,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010975741781294346,
"signal/frontier_coverage_25/centered_abs_mean": 0.1587800681591034,
"signal/frontier_coverage_25/group_std_mean": 0.20560413897037505,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022705549374222754,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022705549374222754,
"signal/frontier_coverage_5/centered_abs_mean": 0.1313490241765976,
"signal/frontier_coverage_5/group_std_mean": 0.17971158623695374,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018782909493893385,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018782909493893385,
"step": 185
},
{
"calibration/aurc": 0.18193512910285387,
"calibration/batch_distribution_entropy": 0.7778199175476095,
"calibration/buffer_distribution_entropy": 0.8242070841531243,
"calibration/confidence_entropy": 0.3678612367693591,
"calibration/coverage@0%": 0.05865979043198364,
"calibration/coverage@1%": 0.05865979043198364,
"calibration/coverage@10%": 0.34553174257808716,
"calibration/coverage@15%": 0.38030753968253966,
"calibration/coverage@20%": 0.586945316731242,
"calibration/coverage@25%": 0.854896653543307,
"calibration/coverage@30%": 0.9696645341207348,
"calibration/coverage@5%": 0.2798431520853192,
"calibration/ece": 0.12763212133180607,
"calibration/mean_confidence": 0.7513209126914786,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00824652777777779,
"completions/max_length": 3541.6,
"completions/max_terminated_length": 3541.6,
"completions/mean_length": 699.0154541015625,
"completions/mean_terminated_length": 704.8328979492187,
"completions/min_length": 0.0,
"completions/min_terminated_length": 183.0,
"epoch": 0.45599430007124914,
"grad_norm": 0.00043432877282612026,
"learning_rate": 5.421686746987952e-07,
"loss": -0.0066,
"num_tokens": 416937668.0,
"reward": 1.0384333848953247,
"reward_std": 0.12415469735860825,
"rewards/accuracy_reward": 0.7216145873069764,
"rewards/brier_reward": 0.8176455974578858,
"rewards/confidence_uniqueness_reward": 0.9341374635696411,
"rewards/format_reward": 0.9917534708976745,
"rewards/frontier_coverage_0": 0.02207240234129131,
"rewards/frontier_coverage_1": 0.022145295469090342,
"rewards/frontier_coverage_10": 0.023324301373213528,
"rewards/frontier_coverage_15": 0.0292608555406332,
"rewards/frontier_coverage_20": 0.07275687083601952,
"rewards/frontier_coverage_25": 0.2676252216100693,
"rewards/frontier_coverage_5": 0.0223290272988379,
"signal/accuracy_reward/centered_abs_mean": 0.16108398735523224,
"signal/accuracy_reward/group_std_mean": 0.2116878628730774,
"signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08054199367761612,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08054199367761612,
"signal/advantage_abs_mean": 0.08960044384002686,
"signal/advantage_pre_scale_abs_mean": 0.08960044384002686,
"signal/advantage_pre_scale_std": 0.16030249297618865,
"signal/advantage_std": 0.16030249297618865,
"signal/brier_reward/centered_abs_mean": 0.13036527633666992,
"signal/brier_reward/group_std_mean": 0.17200060486793517,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013036527484655381,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013036527484655381,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0354267667979002,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05462343618273735,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003542676754295826,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003542676754295826,
"signal/format_reward/centered_abs_mean": 0.01468641497194767,
"signal/format_reward/group_std_mean": 0.02957034520804882,
"signal/format_reward/group_zero_std_frac": 0.8722222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007343207485973835,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007343207485973835,
"signal/frontier_coverage_0/centered_abs_mean": 0.1278734177350998,
"signal/frontier_coverage_0/group_std_mean": 0.17466306388378144,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018285899190232159,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018285899190232159,
"signal/frontier_coverage_1/centered_abs_mean": 0.12777684330940248,
"signal/frontier_coverage_1/group_std_mean": 0.1745421200990677,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018272089073434472,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018272089073434472,
"signal/frontier_coverage_10/centered_abs_mean": 0.12328650057315826,
"signal/frontier_coverage_10/group_std_mean": 0.16869595050811767,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017629968700930477,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017629968700930477,
"signal/frontier_coverage_15/centered_abs_mean": 0.10532844662666321,
"signal/frontier_coverage_15/group_std_mean": 0.14432147443294524,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015061968471854926,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015061968471854926,
"signal/frontier_coverage_20/centered_abs_mean": 0.07010861709713936,
"signal/frontier_coverage_20/group_std_mean": 0.09235068708658219,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010025532450526954,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010025532450526954,
"signal/frontier_coverage_25/centered_abs_mean": 0.1480533003807068,
"signal/frontier_coverage_25/group_std_mean": 0.19292950630187988,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002117162151262164,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002117162151262164,
"signal/frontier_coverage_5/centered_abs_mean": 0.12679695785045625,
"signal/frontier_coverage_5/group_std_mean": 0.17328327000141144,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018131964607164264,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018131964607164264,
"step": 190
},
{
"calibration/aurc": 0.19111161916549027,
"calibration/batch_distribution_entropy": 0.8201703152347986,
"calibration/buffer_distribution_entropy": 0.8106126533134101,
"calibration/confidence_entropy": 0.37700403449513026,
"calibration/coverage@0%": 0.015711122047244095,
"calibration/coverage@1%": 0.015711122047244095,
"calibration/coverage@10%": 0.27973864449795016,
"calibration/coverage@15%": 0.5170787035530683,
"calibration/coverage@20%": 0.5962166628706536,
"calibration/coverage@25%": 0.6621170432853951,
"calibration/coverage@30%": 0.7699052081299754,
"calibration/coverage@5%": 0.11544865485564304,
"calibration/ece": 0.14145817553412607,
"calibration/mean_confidence": 0.6937342965939054,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012673611111111094,
"completions/max_length": 3786.0,
"completions/max_terminated_length": 3786.0,
"completions/mean_length": 717.1263916015625,
"completions/mean_terminated_length": 726.4796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 180.2,
"epoch": 0.46799415007312406,
"grad_norm": 0.0004493010346777737,
"learning_rate": 3.91566265060241e-07,
"loss": -0.0096,
"num_tokens": 428279828.0,
"reward": 1.008443033695221,
"reward_std": 0.12740874141454697,
"rewards/accuracy_reward": 0.6736111044883728,
"rewards/brier_reward": 0.7872533559799194,
"rewards/confidence_uniqueness_reward": 0.9333649158477784,
"rewards/format_reward": 0.9872395873069764,
"rewards/frontier_coverage_0": 0.027697153389453888,
"rewards/frontier_coverage_1": 0.027658072859048845,
"rewards/frontier_coverage_10": 0.027347087673842908,
"rewards/frontier_coverage_15": 0.030936553701758386,
"rewards/frontier_coverage_20": 0.06847289353609085,
"rewards/frontier_coverage_25": 0.20654830634593963,
"rewards/frontier_coverage_5": 0.027833018451929092,
"signal/accuracy_reward/centered_abs_mean": 0.1546983540058136,
"signal/accuracy_reward/group_std_mean": 0.2061179220676422,
"signal/accuracy_reward/group_zero_std_frac": 0.40555556416511535,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0773491770029068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0773491770029068,
"signal/advantage_abs_mean": 0.09210169464349746,
"signal/advantage_pre_scale_abs_mean": 0.09210169464349746,
"signal/advantage_pre_scale_std": 0.16345611214637756,
"signal/advantage_std": 0.16345611214637756,
"signal/brier_reward/centered_abs_mean": 0.14198019355535507,
"signal/brier_reward/group_std_mean": 0.18473336696624756,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014198019355535507,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014198019355535507,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03743585646152496,
"signal/confidence_uniqueness_reward/group_std_mean": 0.058041921257972716,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037435856182128193,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037435856182128193,
"signal/format_reward/centered_abs_mean": 0.019276259280741215,
"signal/format_reward/group_std_mean": 0.036431630700826646,
"signal/format_reward/group_zero_std_frac": 0.8472222447395324,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009638129640370608,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009638129640370608,
"signal/frontier_coverage_0/centered_abs_mean": 0.12945400625467302,
"signal/frontier_coverage_0/group_std_mean": 0.1761310279369354,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001851192256435752,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001851192256435752,
"signal/frontier_coverage_1/centered_abs_mean": 0.12929727286100387,
"signal/frontier_coverage_1/group_std_mean": 0.17592544853687286,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018489510286599398,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018489510286599398,
"signal/frontier_coverage_10/centered_abs_mean": 0.12513509392738342,
"signal/frontier_coverage_10/group_std_mean": 0.17050479352474213,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001789431762881577,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001789431762881577,
"signal/frontier_coverage_15/centered_abs_mean": 0.10048199743032456,
"signal/frontier_coverage_15/group_std_mean": 0.13747784048318862,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014368925243616105,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014368925243616105,
"signal/frontier_coverage_20/centered_abs_mean": 0.07295912206172943,
"signal/frontier_coverage_20/group_std_mean": 0.09612232595682144,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010433154529891908,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010433154529891908,
"signal/frontier_coverage_25/centered_abs_mean": 0.13768279552459717,
"signal/frontier_coverage_25/group_std_mean": 0.18044605255126953,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001968864002265036,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001968864002265036,
"signal/frontier_coverage_5/centered_abs_mean": 0.12782656103372575,
"signal/frontier_coverage_5/group_std_mean": 0.17402253448963165,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018279198091477155,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018279198091477155,
"step": 195
},
{
"calibration/aurc": 0.17856991685454757,
"calibration/batch_distribution_entropy": 0.7682419787958594,
"calibration/buffer_distribution_entropy": 0.8208414222154016,
"calibration/confidence_entropy": 0.3823009379147786,
"calibration/coverage@0%": 0.012575064242624123,
"calibration/coverage@1%": 0.012575064242624123,
"calibration/coverage@10%": 0.29729537286758967,
"calibration/coverage@15%": 0.43410561899657835,
"calibration/coverage@20%": 0.6063885338939963,
"calibration/coverage@25%": 0.8951074475065617,
"calibration/coverage@30%": 0.9317585301837269,
"calibration/coverage@5%": 0.17731190634788727,
"calibration/ece": 0.141153362219527,
"calibration/mean_confidence": 0.7661149606034827,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00859375,
"completions/max_length": 3273.2,
"completions/max_terminated_length": 3273.2,
"completions/mean_length": 702.9766479492188,
"completions/mean_terminated_length": 709.0731323242187,
"completions/min_length": 0.0,
"completions/min_terminated_length": 182.6,
"epoch": 0.47999400007499904,
"grad_norm": 0.0004507655103225261,
"learning_rate": 2.409638554216868e-07,
"loss": -0.006,
"num_tokens": 439445927.0,
"reward": 1.0263540983200072,
"reward_std": 0.12140857428312302,
"rewards/accuracy_reward": 0.6982638835906982,
"rewards/brier_reward": 0.8136134743690491,
"rewards/confidence_uniqueness_reward": 0.9341794133186341,
"rewards/format_reward": 0.99140625,
"rewards/frontier_coverage_0": 0.032625177130103114,
"rewards/frontier_coverage_1": 0.032625177130103114,
"rewards/frontier_coverage_10": 0.031621862575411795,
"rewards/frontier_coverage_15": 0.029490308091044425,
"rewards/frontier_coverage_20": 0.0741084560751915,
"rewards/frontier_coverage_25": 0.23827401399612427,
"rewards/frontier_coverage_5": 0.032565965130925176,
"signal/accuracy_reward/centered_abs_mean": 0.14779730439186095,
"signal/accuracy_reward/group_std_mean": 0.19302791357040405,
"signal/accuracy_reward/group_zero_std_frac": 0.45555556416511533,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07389865219593048,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07389865219593048,
"signal/advantage_abs_mean": 0.08865651488304138,
"signal/advantage_pre_scale_abs_mean": 0.08865651488304138,
"signal/advantage_pre_scale_std": 0.16045403480529785,
"signal/advantage_std": 0.16045403480529785,
"signal/brier_reward/centered_abs_mean": 0.1274328500032425,
"signal/brier_reward/group_std_mean": 0.1681652307510376,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012743284739553929,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012743284739553929,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03525126874446869,
"signal/confidence_uniqueness_reward/group_std_mean": 0.057352755963802335,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035251271445304157,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035251271445304157,
"signal/format_reward/centered_abs_mean": 0.015196397714316846,
"signal/format_reward/group_std_mean": 0.033493170887231825,
"signal/format_reward/group_zero_std_frac": 0.8416666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007598198857158423,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007598198857158423,
"signal/frontier_coverage_0/centered_abs_mean": 0.10695488750934601,
"signal/frontier_coverage_0/group_std_mean": 0.15166882872581483,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015294548822566867,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015294548822566867,
"signal/frontier_coverage_1/centered_abs_mean": 0.10695488750934601,
"signal/frontier_coverage_1/group_std_mean": 0.15166882872581483,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015294548822566867,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015294548822566867,
"signal/frontier_coverage_10/centered_abs_mean": 0.1000509575009346,
"signal/frontier_coverage_10/group_std_mean": 0.14240919947624206,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014307287288829683,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014307287288829683,
"signal/frontier_coverage_15/centered_abs_mean": 0.08026445358991623,
"signal/frontier_coverage_15/group_std_mean": 0.11479192227125168,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011477816849946975,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011477816849946975,
"signal/frontier_coverage_20/centered_abs_mean": 0.07273156195878983,
"signal/frontier_coverage_20/group_std_mean": 0.09654622375965119,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010400613187812268,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010400613187812268,
"signal/frontier_coverage_25/centered_abs_mean": 0.14482411444187165,
"signal/frontier_coverage_25/group_std_mean": 0.18901716768741608,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020709848264232277,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020709848264232277,
"signal/frontier_coverage_5/centered_abs_mean": 0.10635966509580612,
"signal/frontier_coverage_5/group_std_mean": 0.15090845227241517,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015209432225674392,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015209432225674392,
"step": 200
},
{
"epoch": 0.47999400007499904,
"eval_calibration/aurc": 0.15612408573344327,
"eval_calibration/batch_distribution_entropy": 0.7397058913988753,
"eval_calibration/buffer_distribution_entropy": 0.8111514149268445,
"eval_calibration/confidence_entropy": 0.3610454566922106,
"eval_calibration/coverage@0%": 0.09895833333333333,
"eval_calibration/coverage@1%": 0.09895833333333333,
"eval_calibration/coverage@10%": 0.4114583333333333,
"eval_calibration/coverage@15%": 0.6510416666666666,
"eval_calibration/coverage@20%": 0.7864583333333334,
"eval_calibration/coverage@25%": 0.9427083333333334,
"eval_calibration/coverage@30%": 0.9895833333333334,
"eval_calibration/coverage@5%": 0.20833333333333334,
"eval_calibration/ece": 0.164064940040337,
"eval_calibration/mean_confidence": 0.7508913785324004,
"eval_completions/clipped_ratio": 0.006076388888888877,
"eval_completions/max_length": 2743.3333333333335,
"eval_completions/max_terminated_length": 2743.3333333333335,
"eval_completions/mean_length": 718.5382080078125,
"eval_completions/mean_terminated_length": 722.8815104166666,
"eval_completions/min_length": 98.83333333333333,
"eval_completions/min_terminated_length": 235.33333333333334,
"eval_loss": 0.0,
"eval_num_tokens": 439445927.0,
"eval_reward": 1.012143760919571,
"eval_reward_std": 0.2549586296081543,
"eval_rewards/accuracy_reward": 0.6822916666666666,
"eval_rewards/brier_reward": 0.8020952641963959,
"eval_rewards/confidence_uniqueness_reward": 0.884552131096522,
"eval_rewards/format_reward": 0.9921874900658926,
"eval_rewards/frontier_coverage_0": 0.03533175913617015,
"eval_rewards/frontier_coverage_1": 0.03533175913617015,
"eval_rewards/frontier_coverage_10": 0.035481404474315546,
"eval_rewards/frontier_coverage_15": 0.03456710961957773,
"eval_rewards/frontier_coverage_20": 0.06474988100429376,
"eval_rewards/frontier_coverage_25": 0.19553381452957788,
"eval_rewards/frontier_coverage_5": 0.035325445909014284,
"eval_runtime": 205.202,
"eval_samples_per_second": 4.873,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4191623230775197,
"eval_signal/accuracy_reward/group_std_mean": 0.4642222821712494,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20958116153875986,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20958116153875986,
"eval_signal/advantage_abs_mean": 0.22168858846028647,
"eval_signal/advantage_pre_scale_abs_mean": 0.22168858846028647,
"eval_signal/advantage_pre_scale_std": 0.2531501104434331,
"eval_signal/advantage_std": 0.2531501104434331,
"eval_signal/brier_reward/centered_abs_mean": 0.22948966672023138,
"eval_signal/brier_reward/group_std_mean": 0.29068108399709064,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022948966672023136,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.022948966672023136,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.057275036349892616,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08676877121130626,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00572750383677582,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00572750383677582,
"eval_signal/format_reward/centered_abs_mean": 0.015136718284338713,
"eval_signal/format_reward/group_std_mean": 0.044194173999130726,
"eval_signal/format_reward/group_zero_std_frac": 0.750000019868215,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007568359142169356,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359142169356,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.1817739779750506,
"eval_signal/frontier_coverage_0/group_std_mean": 0.3032199541727702,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025993677166601024,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025993677166601024,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.1817739779750506,
"eval_signal/frontier_coverage_1/group_std_mean": 0.3032199541727702,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025993677166601024,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025993677166601024,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.17377296338478723,
"eval_signal/frontier_coverage_10/group_std_mean": 0.2915558119614919,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002484953341384729,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002484953341384729,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.13976867124438286,
"eval_signal/frontier_coverage_15/group_std_mean": 0.2382419357697169,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019986919360235333,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019986919360235333,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.11765346676111221,
"eval_signal/frontier_coverage_20/group_std_mean": 0.16934698323408762,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016824445920065045,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016824445920065045,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.27798762917518616,
"eval_signal/frontier_coverage_25/group_std_mean": 0.33808498084545135,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00397522277974834,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00397522277974834,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.18090522040923437,
"eval_signal/frontier_coverage_5/group_std_mean": 0.30195263028144836,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025869448048373065,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025869448048373065,
"eval_steps_per_second": 0.029,
"step": 200
},
{
"calibration/aurc": 0.20067151651571807,
"calibration/batch_distribution_entropy": 0.7387570320570516,
"calibration/buffer_distribution_entropy": 0.8004499003116875,
"calibration/confidence_entropy": 0.3662937348410689,
"calibration/coverage@0%": 0.02244764397905759,
"calibration/coverage@1%": 0.02244764397905759,
"calibration/coverage@10%": 0.1557400741710297,
"calibration/coverage@15%": 0.376529777486911,
"calibration/coverage@20%": 0.47280793607054294,
"calibration/coverage@25%": 0.8266519702397355,
"calibration/coverage@30%": 0.9607329842931938,
"calibration/coverage@5%": 0.03338514397905759,
"calibration/ece": 0.13317514525521598,
"calibration/mean_confidence": 0.7690856718981816,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008767361111111116,
"completions/max_length": 3502.2,
"completions/max_terminated_length": 3502.2,
"completions/mean_length": 717.9185913085937,
"completions/mean_terminated_length": 724.2613647460937,
"completions/min_length": 0.0,
"completions/min_terminated_length": 173.8,
"epoch": 0.491993850076874,
"grad_norm": 0.00037455017445608974,
"learning_rate": 9.036144578313253e-08,
"loss": -0.0073,
"num_tokens": 450782301.0,
"reward": 1.0498441219329835,
"reward_std": 0.11888199001550674,
"rewards/accuracy_reward": 0.7449652791023255,
"rewards/brier_reward": 0.8252813696861268,
"rewards/confidence_uniqueness_reward": 0.9361446976661683,
"rewards/format_reward": 0.9912326455116272,
"rewards/frontier_coverage_0": 0.011065579298883677,
"rewards/frontier_coverage_1": 0.011065579298883677,
"rewards/frontier_coverage_10": 0.013013468869030476,
"rewards/frontier_coverage_15": 0.01967374011874199,
"rewards/frontier_coverage_20": 0.07497628033161163,
"rewards/frontier_coverage_25": 0.25076726377010344,
"rewards/frontier_coverage_5": 0.011220467463135719,
"signal/accuracy_reward/centered_abs_mean": 0.14809027910232545,
"signal/accuracy_reward/group_std_mean": 0.20212058126926422,
"signal/accuracy_reward/group_zero_std_frac": 0.402777773141861,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07404513955116272,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07404513955116272,
"signal/advantage_abs_mean": 0.0827422708272934,
"signal/advantage_pre_scale_abs_mean": 0.0827422708272934,
"signal/advantage_pre_scale_std": 0.1549478828907013,
"signal/advantage_std": 0.1549478828907013,
"signal/brier_reward/centered_abs_mean": 0.12346100956201553,
"signal/brier_reward/group_std_mean": 0.16328605115413666,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012346101738512517,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012346101738512517,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0347956083714962,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05594836547970772,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034795609302818776,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034795609302818776,
"signal/format_reward/centered_abs_mean": 0.015869140625,
"signal/format_reward/group_std_mean": 0.03326698914170265,
"signal/format_reward/group_zero_std_frac": 0.8527777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0079345703125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0079345703125,
"signal/frontier_coverage_0/centered_abs_mean": 0.12408159524202347,
"signal/frontier_coverage_0/group_std_mean": 0.16999780237674714,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017743667354807257,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017743667354807257,
"signal/frontier_coverage_1/centered_abs_mean": 0.12408159524202347,
"signal/frontier_coverage_1/group_std_mean": 0.16999780237674714,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017743667354807257,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017743667354807257,
"signal/frontier_coverage_10/centered_abs_mean": 0.1159680426120758,
"signal/frontier_coverage_10/group_std_mean": 0.15904043912887572,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016583429649472236,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016583429649472236,
"signal/frontier_coverage_15/centered_abs_mean": 0.09267009794712067,
"signal/frontier_coverage_15/group_std_mean": 0.12770854830741882,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013251824537292122,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013251824537292122,
"signal/frontier_coverage_20/centered_abs_mean": 0.06762583926320076,
"signal/frontier_coverage_20/group_std_mean": 0.09054560959339142,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009670495055615902,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009670495055615902,
"signal/frontier_coverage_25/centered_abs_mean": 0.12514513731002808,
"signal/frontier_coverage_25/group_std_mean": 0.16600916385650635,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017895755358040334,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017895755358040334,
"signal/frontier_coverage_5/centered_abs_mean": 0.12371799796819687,
"signal/frontier_coverage_5/group_std_mean": 0.16952943801879883,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001769167324528098,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001769167324528098,
"step": 205
},
{
"calibration/aurc": 0.09258411805079743,
"calibration/batch_distribution_entropy": 0.7909286874200081,
"calibration/buffer_distribution_entropy": 0.7829421573350821,
"calibration/confidence_entropy": 0.380171748559697,
"calibration/coverage@0%": 0.07280701754385965,
"calibration/coverage@1%": 0.10964912280701755,
"calibration/coverage@10%": 0.6285096107805916,
"calibration/coverage@15%": 0.7868216911868257,
"calibration/coverage@20%": 0.9004364963028112,
"calibration/coverage@25%": 0.9665205474352353,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.4454574210372016,
"calibration/ece": 0.06343821279388381,
"calibration/mean_confidence": 0.7582262227842415,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00853587962962965,
"completions/max_length": 2820.3333333333335,
"completions/max_terminated_length": 2820.3333333333335,
"completions/mean_length": 715.6140340169271,
"completions/mean_terminated_length": 721.9536946614584,
"completions/min_length": 0.0,
"completions/min_terminated_length": 167.33333333333334,
"epoch": 0.49919376007799904,
"num_tokens": 457594673.0,
"reward": 1.0277764797210693,
"reward_std": 0.12234559903542201,
"rewards/accuracy_reward": 0.7025462985038757,
"rewards/brier_reward": 0.8049926360448202,
"rewards/confidence_uniqueness_reward": 0.932228704293569,
"rewards/format_reward": 0.9914641181627909,
"rewards/frontier_coverage_0": 0.023547140260537464,
"rewards/frontier_coverage_1": 0.023547140260537464,
"rewards/frontier_coverage_10": 0.023169512239595253,
"rewards/frontier_coverage_15": 0.028680586876968544,
"rewards/frontier_coverage_20": 0.09770172089338303,
"rewards/frontier_coverage_25": 0.2726644178231557,
"rewards/frontier_coverage_5": 0.02363560472925504,
"signal/accuracy_reward/centered_abs_mean": 0.15646701554457346,
"signal/accuracy_reward/group_std_mean": 0.20409129559993744,
"signal/accuracy_reward/group_zero_std_frac": 0.4305555721124013,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07823350777228673,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07823350777228673,
"signal/advantage_abs_mean": 0.0893336609005928,
"signal/advantage_pre_scale_abs_mean": 0.0893336609005928,
"signal/advantage_pre_scale_std": 0.1595475971698761,
"signal/advantage_std": 0.1595475971698761,
"signal/brier_reward/centered_abs_mean": 0.13335002462069193,
"signal/brier_reward/group_std_mean": 0.177884042263031,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013335002275804678,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013335002275804678,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03668076234559218,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05635303258895874,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036680761259049177,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036680761259049177,
"signal/format_reward/centered_abs_mean": 0.0151457612713178,
"signal/format_reward/group_std_mean": 0.030251561353604,
"signal/format_reward/group_zero_std_frac": 0.8703703681627909,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0075728806356589,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0075728806356589,
"signal/frontier_coverage_0/centered_abs_mean": 0.12804659952720007,
"signal/frontier_coverage_0/group_std_mean": 0.174424409866333,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018310664454475045,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018310664454475045,
"signal/frontier_coverage_1/centered_abs_mean": 0.12804659952720007,
"signal/frontier_coverage_1/group_std_mean": 0.174424409866333,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018310664454475045,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018310664454475045,
"signal/frontier_coverage_10/centered_abs_mean": 0.11198657502730687,
"signal/frontier_coverage_10/group_std_mean": 0.15388049681981406,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016014080417032044,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016014080417032044,
"signal/frontier_coverage_15/centered_abs_mean": 0.08891634891430537,
"signal/frontier_coverage_15/group_std_mean": 0.12195203453302383,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012715038610622287,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012715038610622287,
"signal/frontier_coverage_20/centered_abs_mean": 0.07647461940844853,
"signal/frontier_coverage_20/group_std_mean": 0.10173061241706212,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010935871008162696,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010935871008162696,
"signal/frontier_coverage_25/centered_abs_mean": 0.1526160587867101,
"signal/frontier_coverage_25/group_std_mean": 0.2031193325916926,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002182409632951021,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002182409632951021,
"signal/frontier_coverage_5/centered_abs_mean": 0.12730942914883295,
"signal/frontier_coverage_5/group_std_mean": 0.17352166771888733,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001820524805225432,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001820524805225432,
"step": 208,
"total_flos": 0.0,
"train_loss": -0.00772521308625493,
"train_runtime": 40973.9456,
"train_samples_per_second": 0.366,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 208,
"num_input_tokens_seen": 457594673,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}