5402 lines
333 KiB
JSON
5402 lines
333 KiB
JSON
|
|
{
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 0.49919376007799904,
|
||
|
|
"eval_steps": 50,
|
||
|
|
"global_step": 208,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.49858621709600043,
|
||
|
|
"calibration/batch_distribution_entropy": 0.27179949345286947,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.2826936735263452,
|
||
|
|
"calibration/confidence_entropy": 0.22057572312827042,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0,
|
||
|
|
"calibration/coverage@15%": 0.0,
|
||
|
|
"calibration/coverage@20%": 0.0,
|
||
|
|
"calibration/coverage@25%": 0.0,
|
||
|
|
"calibration/coverage@30%": 0.0,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.46227961186189803,
|
||
|
|
"calibration/mean_confidence": 0.9144884743892769,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.02005208333333335,
|
||
|
|
"completions/max_length": 3998.4,
|
||
|
|
"completions/max_terminated_length": 3998.4,
|
||
|
|
"completions/mean_length": 516.7477416992188,
|
||
|
|
"completions/mean_terminated_length": 527.3296142578125,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 2.0,
|
||
|
|
"epoch": 0.011999850001874977,
|
||
|
|
"grad_norm": 0.003858975600451231,
|
||
|
|
"learning_rate": 5.952380952380953e-07,
|
||
|
|
"loss": 0.0068,
|
||
|
|
"num_tokens": 9067142.0,
|
||
|
|
"reward": 0.5035168766975403,
|
||
|
|
"reward_std": 0.44194251894950864,
|
||
|
|
"rewards/accuracy_reward": 0.25572916567325593,
|
||
|
|
"rewards/brier_reward": 0.3094047784805298,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.28810680508613584,
|
||
|
|
"rewards/format_reward": 0.5986111044883728,
|
||
|
|
"rewards/frontier_coverage_0": 0.16578977052122354,
|
||
|
|
"rewards/frontier_coverage_1": 0.16578977052122354,
|
||
|
|
"rewards/frontier_coverage_10": 0.16578977052122354,
|
||
|
|
"rewards/frontier_coverage_15": 0.16578977052122354,
|
||
|
|
"rewards/frontier_coverage_20": 0.16578977052122354,
|
||
|
|
"rewards/frontier_coverage_25": 0.16578977052122354,
|
||
|
|
"rewards/frontier_coverage_5": 0.16578977052122354,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.3037217855453491,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.3625373482704163,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.10555555745959282,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15186089277267456,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15186089277267456,
|
||
|
|
"signal/advantage_abs_mean": 0.383181232213974,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.383181232213974,
|
||
|
|
"signal/advantage_pre_scale_std": 0.4454788088798523,
|
||
|
|
"signal/advantage_std": 0.4454788088798523,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.31622138023376467,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.36951943635940554,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03162213787436485,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03162213787436485,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.237173992395401,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2888317406177521,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023717399314045907,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023717399314045907,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.43920356035232544,
|
||
|
|
"signal/format_reward/group_std_mean": 0.4745051324367523,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.21960178017616272,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.21960178017616272,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19068164750933647,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23141059912741185,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027267476194538175,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027267476194538175,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19068164750933647,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23141059912741185,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027267476194538175,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027267476194538175,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19068164750933647,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23141059912741185,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027267476194538175,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027267476194538175,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19068164750933647,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23141059912741185,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027267476194538175,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027267476194538175,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19068164750933647,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23141059912741185,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027267476194538175,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027267476194538175,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19068164750933647,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23141059912741185,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027267476194538175,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027267476194538175,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19068164750933647,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23141059912741185,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027267476194538175,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027267476194538175,
|
||
|
|
"step": 5
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.5048620587670756,
|
||
|
|
"calibration/batch_distribution_entropy": 0.23936490089336626,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.276244326153706,
|
||
|
|
"calibration/confidence_entropy": 0.21518765364765557,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0,
|
||
|
|
"calibration/coverage@15%": 0.0,
|
||
|
|
"calibration/coverage@20%": 0.0,
|
||
|
|
"calibration/coverage@25%": 0.0,
|
||
|
|
"calibration/coverage@30%": 0.0,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.4767879771080269,
|
||
|
|
"calibration/mean_confidence": 0.9237536204358829,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.01918402777777779,
|
||
|
|
"completions/max_length": 3910.2,
|
||
|
|
"completions/max_terminated_length": 3910.2,
|
||
|
|
"completions/mean_length": 477.1962585449219,
|
||
|
|
"completions/mean_terminated_length": 486.66339721679685,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 21.2,
|
||
|
|
"epoch": 0.023999700003749954,
|
||
|
|
"grad_norm": 0.03838086128234863,
|
||
|
|
"learning_rate": 1.1904761904761906e-06,
|
||
|
|
"loss": 0.0025,
|
||
|
|
"num_tokens": 17647163.0,
|
||
|
|
"reward": 0.5732499718666076,
|
||
|
|
"reward_std": 0.39466784000396726,
|
||
|
|
"rewards/accuracy_reward": 0.290625,
|
||
|
|
"rewards/brier_reward": 0.35456337332725524,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.35486308932304383,
|
||
|
|
"rewards/format_reward": 0.7128472208976746,
|
||
|
|
"rewards/frontier_coverage_0": 0.00570630207657814,
|
||
|
|
"rewards/frontier_coverage_1": 0.00570630207657814,
|
||
|
|
"rewards/frontier_coverage_10": 0.00570630207657814,
|
||
|
|
"rewards/frontier_coverage_15": 0.00570630207657814,
|
||
|
|
"rewards/frontier_coverage_20": 0.00570630207657814,
|
||
|
|
"rewards/frontier_coverage_25": 0.00570630207657814,
|
||
|
|
"rewards/frontier_coverage_5": 0.00570630207657814,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.32489149570465087,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.3819944679737091,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.0833333358168602,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16244574785232543,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.16244574785232543,
|
||
|
|
"signal/advantage_abs_mean": 0.32642056941986086,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.32642056941986086,
|
||
|
|
"signal/advantage_pre_scale_std": 0.3967562675476074,
|
||
|
|
"signal/advantage_std": 0.3967562675476074,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.32057392597198486,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.3732548654079437,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.032057393342256546,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.032057393342256546,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.22309686243534088,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2798730194568634,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022309686988592148,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022309686988592148,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.3567274272441864,
|
||
|
|
"signal/format_reward/group_std_mean": 0.42118590474128725,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.00555555559694767,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1783637136220932,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1783637136220932,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.015685518644750117,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.0333976186811924,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00022430291573982686,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00022430291573982686,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.015685518644750117,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0333976186811924,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00022430291573982686,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00022430291573982686,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.015685518644750117,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0333976186811924,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00022430291573982686,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00022430291573982686,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.015685518644750117,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0333976186811924,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00022430291573982686,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00022430291573982686,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.015685518644750117,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0333976186811924,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00022430291573982686,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00022430291573982686,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.015685518644750117,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0333976186811924,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00022430291573982686,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00022430291573982686,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.015685518644750117,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0333976186811924,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00022430291573982686,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00022430291573982686,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.5651461697219853,
|
||
|
|
"calibration/batch_distribution_entropy": 0.2858721407601498,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.26435241204989707,
|
||
|
|
"calibration/confidence_entropy": 0.23515916341087234,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0,
|
||
|
|
"calibration/coverage@15%": 0.0,
|
||
|
|
"calibration/coverage@20%": 0.0,
|
||
|
|
"calibration/coverage@25%": 0.0,
|
||
|
|
"calibration/coverage@30%": 0.0,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.5243550099379826,
|
||
|
|
"calibration/mean_confidence": 0.9145028922999036,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.011197916666666674,
|
||
|
|
"completions/max_length": 3814.8,
|
||
|
|
"completions/max_terminated_length": 3814.8,
|
||
|
|
"completions/mean_length": 415.96303100585936,
|
||
|
|
"completions/mean_terminated_length": 420.7157958984375,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 60.6,
|
||
|
|
"epoch": 0.03599955000562493,
|
||
|
|
"grad_norm": 0.001462470623664558,
|
||
|
|
"learning_rate": 1.7857142857142859e-06,
|
||
|
|
"loss": -0.0064,
|
||
|
|
"num_tokens": 25541041.0,
|
||
|
|
"reward": 0.7139440774917603,
|
||
|
|
"reward_std": 0.2866878867149353,
|
||
|
|
"rewards/accuracy_reward": 0.3111111164093018,
|
||
|
|
"rewards/brier_reward": 0.4146228313446045,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.5058956265449523,
|
||
|
|
"rewards/format_reward": 0.9306423544883728,
|
||
|
|
"rewards/frontier_coverage_0": 0.010144511703401804,
|
||
|
|
"rewards/frontier_coverage_1": 0.010144511703401804,
|
||
|
|
"rewards/frontier_coverage_10": 0.010144511703401804,
|
||
|
|
"rewards/frontier_coverage_15": 0.010144511703401804,
|
||
|
|
"rewards/frontier_coverage_20": 0.010144511703401804,
|
||
|
|
"rewards/frontier_coverage_25": 0.010144511703401804,
|
||
|
|
"rewards/frontier_coverage_5": 0.010144511703401804,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.3240451455116272,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.3799292385578156,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.09444444701075554,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1620225727558136,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1620225727558136,
|
||
|
|
"signal/advantage_abs_mean": 0.22506832480430602,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.22506832480430602,
|
||
|
|
"signal/advantage_pre_scale_std": 0.29292616844177244,
|
||
|
|
"signal/advantage_std": 0.29292616844177244,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.30433117747306826,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.3543997764587402,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030433119088411332,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.030433119088411332,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.17291399836540222,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.22697044014930726,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01729139983654022,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01729139983654022,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.11800672635436057,
|
||
|
|
"signal/format_reward/group_std_mean": 0.20241138935089112,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.2583333317190409,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05900336317718029,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.05900336317718029,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.021325640380382538,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.04250783696770668,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00030495663813780995,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00030495663813780995,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.021325640380382538,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.04250783696770668,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00030495663813780995,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00030495663813780995,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.021325640380382538,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.04250783696770668,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00030495663813780995,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00030495663813780995,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.021325640380382538,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.04250783696770668,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00030495663813780995,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00030495663813780995,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.021325640380382538,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.04250783696770668,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00030495663813780995,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00030495663813780995,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.021325640380382538,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.04250783696770668,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00030495663813780995,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00030495663813780995,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.021325640380382538,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.04250783696770668,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00030495663813780995,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00030495663813780995,
|
||
|
|
"step": 15
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.4942660369668168,
|
||
|
|
"calibration/batch_distribution_entropy": 0.36107236461197073,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.2866057794747389,
|
||
|
|
"calibration/confidence_entropy": 0.28890118485400956,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0,
|
||
|
|
"calibration/coverage@15%": 0.0,
|
||
|
|
"calibration/coverage@20%": 0.0,
|
||
|
|
"calibration/coverage@25%": 0.0,
|
||
|
|
"calibration/coverage@30%": 0.03717277486910995,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.4425994634048266,
|
||
|
|
"calibration/mean_confidence": 0.8945259513282633,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.008767361111111116,
|
||
|
|
"completions/max_length": 3545.2,
|
||
|
|
"completions/max_terminated_length": 3545.2,
|
||
|
|
"completions/mean_length": 423.8654479980469,
|
||
|
|
"completions/mean_terminated_length": 427.660546875,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 68.8,
|
||
|
|
"epoch": 0.04799940000749991,
|
||
|
|
"grad_norm": 0.0008591993246227503,
|
||
|
|
"learning_rate": 2.380952380952381e-06,
|
||
|
|
"loss": -0.0082,
|
||
|
|
"num_tokens": 33537667.0,
|
||
|
|
"reward": 0.8044180393218994,
|
||
|
|
"reward_std": 0.2287308543920517,
|
||
|
|
"rewards/accuracy_reward": 0.4008680522441864,
|
||
|
|
"rewards/brier_reward": 0.5150173962116241,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.5862200736999512,
|
||
|
|
"rewards/format_reward": 0.9855034828186036,
|
||
|
|
"rewards/frontier_coverage_0": 0.011074092797935009,
|
||
|
|
"rewards/frontier_coverage_1": 0.011074092797935009,
|
||
|
|
"rewards/frontier_coverage_10": 0.011074092797935009,
|
||
|
|
"rewards/frontier_coverage_15": 0.011074092797935009,
|
||
|
|
"rewards/frontier_coverage_20": 0.011074092797935009,
|
||
|
|
"rewards/frontier_coverage_25": 0.011074092797935009,
|
||
|
|
"rewards/frontier_coverage_5": 0.011074092797935009,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2977321982383728,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.3633489072322845,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.08333333432674409,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1488660991191864,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1488660991191864,
|
||
|
|
"signal/advantage_abs_mean": 0.18184916377067567,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.18184916377067567,
|
||
|
|
"signal/advantage_pre_scale_std": 0.23659501671791078,
|
||
|
|
"signal/advantage_std": 0.23659501671791078,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.26825318336486814,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.32477903366088867,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02682531885802746,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02682531885802746,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.16254269182682038,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1992730051279068,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01625426858663559,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01625426858663559,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.026285807229578496,
|
||
|
|
"signal/format_reward/group_std_mean": 0.05511143393814564,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.7555555462837219,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013142903614789248,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013142903614789248,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.02683491036295891,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.04888941571116447,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0003837391850538552,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0003837391850538552,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.02683491036295891,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.04888941571116447,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0003837391850538552,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0003837391850538552,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.02683491036295891,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.04888941571116447,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0003837391850538552,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0003837391850538552,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.02683491036295891,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.04888941571116447,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0003837391850538552,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0003837391850538552,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.02683491036295891,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.04888941571116447,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0003837391850538552,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0003837391850538552,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.02683491036295891,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.04888941571116447,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0003837391850538552,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0003837391850538552,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.02683491036295891,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.04888941571116447,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0003837391850538552,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0003837391850538552,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.39448058005388165,
|
||
|
|
"calibration/batch_distribution_entropy": 0.4518564097762634,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.3448626963299991,
|
||
|
|
"calibration/confidence_entropy": 0.3213623682330117,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0,
|
||
|
|
"calibration/coverage@15%": 0.016753926701570682,
|
||
|
|
"calibration/coverage@20%": 0.019895287958115182,
|
||
|
|
"calibration/coverage@25%": 0.12198952879581151,
|
||
|
|
"calibration/coverage@30%": 0.18481675392670155,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.3243279868719355,
|
||
|
|
"calibration/mean_confidence": 0.8755994903434999,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.007204861111111116,
|
||
|
|
"completions/max_length": 3844.8,
|
||
|
|
"completions/max_terminated_length": 3844.8,
|
||
|
|
"completions/mean_length": 468.8833435058594,
|
||
|
|
"completions/mean_terminated_length": 472.284619140625,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 99.2,
|
||
|
|
"epoch": 0.05999925000937488,
|
||
|
|
"grad_norm": 0.0007930905558168888,
|
||
|
|
"learning_rate": 2.9761904761904763e-06,
|
||
|
|
"loss": -0.0046,
|
||
|
|
"num_tokens": 42063651.0,
|
||
|
|
"reward": 0.8718119025230407,
|
||
|
|
"reward_std": 0.2136448562145233,
|
||
|
|
"rewards/accuracy_reward": 0.4987847149372101,
|
||
|
|
"rewards/brier_reward": 0.6132471799850464,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.6505587697029114,
|
||
|
|
"rewards/format_reward": 0.9907118201255798,
|
||
|
|
"rewards/frontier_coverage_0": 0.00682337733451277,
|
||
|
|
"rewards/frontier_coverage_1": 0.00682337733451277,
|
||
|
|
"rewards/frontier_coverage_10": 0.00682337733451277,
|
||
|
|
"rewards/frontier_coverage_15": 0.00682337733451277,
|
||
|
|
"rewards/frontier_coverage_20": 0.00682337733451277,
|
||
|
|
"rewards/frontier_coverage_25": 0.00682337733451277,
|
||
|
|
"rewards/frontier_coverage_5": 0.00682337733451277,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.28725042939186096,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.35440042018890383,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.08888889104127884,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14362521469593048,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14362521469593048,
|
||
|
|
"signal/advantage_abs_mean": 0.16881968677043915,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.16881968677043915,
|
||
|
|
"signal/advantage_pre_scale_std": 0.22591695785522461,
|
||
|
|
"signal/advantage_std": 0.22591695785522461,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.23450190126895903,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.28934155106544496,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02345018908381462,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02345018908381462,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1386233687400818,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.16758487224578858,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013862336613237857,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013862336613237857,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.017116970755159854,
|
||
|
|
"signal/format_reward/group_std_mean": 0.038513346761465075,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.819444453716278,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008558485377579927,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008558485377579927,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.030816724896430968,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.05202222615480423,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00044067916460335257,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00044067916460335257,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.030816724896430968,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.05202222615480423,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00044067916460335257,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00044067916460335257,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.030816724896430968,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.05202222615480423,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00044067916460335257,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00044067916460335257,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.030816724896430968,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.05202222615480423,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00044067916460335257,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00044067916460335257,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.030816724896430968,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.05202222615480423,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00044067916460335257,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00044067916460335257,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.030816724896430968,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.05202222615480423,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00044067916460335257,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00044067916460335257,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.030816724896430968,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.05202222615480423,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00044067916460335257,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00044067916460335257,
|
||
|
|
"step": 25
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.28599355007968147,
|
||
|
|
"calibration/batch_distribution_entropy": 0.5729950289638938,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.4415618909140395,
|
||
|
|
"calibration/confidence_entropy": 0.39785220307742414,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.013123359580052493,
|
||
|
|
"calibration/coverage@15%": 0.020935859580052493,
|
||
|
|
"calibration/coverage@20%": 0.16300624246293538,
|
||
|
|
"calibration/coverage@25%": 0.3446692071008016,
|
||
|
|
"calibration/coverage@30%": 0.5385029855643044,
|
||
|
|
"calibration/coverage@5%": 0.013123359580052493,
|
||
|
|
"calibration/ece": 0.18942492818507478,
|
||
|
|
"calibration/mean_confidence": 0.8361121732549478,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.013020833333333325,
|
||
|
|
"completions/max_length": 3944.4,
|
||
|
|
"completions/max_terminated_length": 3944.4,
|
||
|
|
"completions/mean_length": 559.3470703125,
|
||
|
|
"completions/mean_terminated_length": 566.7720703125,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 122.8,
|
||
|
|
"epoch": 0.07199910001124986,
|
||
|
|
"grad_norm": 0.0005365905235521495,
|
||
|
|
"learning_rate": 3.5714285714285718e-06,
|
||
|
|
"loss": -0.0078,
|
||
|
|
"num_tokens": 51617249.0,
|
||
|
|
"reward": 0.9077984690666199,
|
||
|
|
"reward_std": 0.19471972584724426,
|
||
|
|
"rewards/accuracy_reward": 0.56171875,
|
||
|
|
"rewards/brier_reward": 0.6795339226722718,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.6588276028633118,
|
||
|
|
"rewards/format_reward": 0.9855034708976745,
|
||
|
|
"rewards/frontier_coverage_0": 0.003508235071785748,
|
||
|
|
"rewards/frontier_coverage_1": 0.003508235071785748,
|
||
|
|
"rewards/frontier_coverage_10": 0.003508235071785748,
|
||
|
|
"rewards/frontier_coverage_15": 0.003508235071785748,
|
||
|
|
"rewards/frontier_coverage_20": 0.003508235071785748,
|
||
|
|
"rewards/frontier_coverage_25": 0.003508235071785748,
|
||
|
|
"rewards/frontier_coverage_5": 0.003508235071785748,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.24979926645755768,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.31267011165618896,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.17500000447034836,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12489963322877884,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.12489963322877884,
|
||
|
|
"signal/advantage_abs_mean": 0.15053375214338302,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.15053375214338302,
|
||
|
|
"signal/advantage_pre_scale_std": 0.21564119458198547,
|
||
|
|
"signal/advantage_std": 0.21564119458198547,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1923845499753952,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.24107061624526976,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019238455034792424,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019238455034792424,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.15954148322343825,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.19074728488922119,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01595414914190769,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01595414914190769,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.02523328997194767,
|
||
|
|
"signal/format_reward/group_std_mean": 0.05074257925152779,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.7777777910232544,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012616644985973835,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012616644985973835,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.036959283798933026,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.057681336998939514,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0005285177612677217,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0005285177612677217,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.036959283798933026,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.057681336998939514,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0005285177612677217,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0005285177612677217,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.036959283798933026,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.057681336998939514,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0005285177612677217,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0005285177612677217,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.036959283798933026,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.057681336998939514,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0005285177612677217,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0005285177612677217,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.036959283798933026,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.057681336998939514,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005285177612677217,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005285177612677217,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.036959283798933026,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.057681336998939514,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005285177612677217,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005285177612677217,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.036959283798933026,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.057681336998939514,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0005285177612677217,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0005285177612677217,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2632535671771489,
|
||
|
|
"calibration/batch_distribution_entropy": 0.6466923551545329,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.5421351864917888,
|
||
|
|
"calibration/confidence_entropy": 0.44397516911608326,
|
||
|
|
"calibration/coverage@0%": 0.015845758641871894,
|
||
|
|
"calibration/coverage@1%": 0.015845758641871894,
|
||
|
|
"calibration/coverage@10%": 0.06649707894187398,
|
||
|
|
"calibration/coverage@15%": 0.1285361788638969,
|
||
|
|
"calibration/coverage@20%": 0.16434714020955804,
|
||
|
|
"calibration/coverage@25%": 0.38898227624249804,
|
||
|
|
"calibration/coverage@30%": 0.8153820641936578,
|
||
|
|
"calibration/coverage@5%": 0.015845758641871894,
|
||
|
|
"calibration/ece": 0.14365905416536864,
|
||
|
|
"calibration/mean_confidence": 0.8021350863697474,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.015364583333333348,
|
||
|
|
"completions/max_length": 4040.8,
|
||
|
|
"completions/max_terminated_length": 4040.8,
|
||
|
|
"completions/mean_length": 634.3866333007812,
|
||
|
|
"completions/mean_terminated_length": 644.3625610351562,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 179.6,
|
||
|
|
"epoch": 0.08399895001312484,
|
||
|
|
"grad_norm": 0.0005198422586545348,
|
||
|
|
"learning_rate": 4.166666666666667e-06,
|
||
|
|
"loss": -0.0098,
|
||
|
|
"num_tokens": 62002823.0,
|
||
|
|
"reward": 0.9421573758125306,
|
||
|
|
"reward_std": 0.17538723051548005,
|
||
|
|
"rewards/accuracy_reward": 0.6183159828186036,
|
||
|
|
"rewards/brier_reward": 0.7280090808868408,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.6922753095626831,
|
||
|
|
"rewards/format_reward": 0.9828993201255798,
|
||
|
|
"rewards/frontier_coverage_0": -0.004782191128470004,
|
||
|
|
"rewards/frontier_coverage_1": -0.004782191128470004,
|
||
|
|
"rewards/frontier_coverage_10": -0.004782191128470004,
|
||
|
|
"rewards/frontier_coverage_15": -0.004782191128470004,
|
||
|
|
"rewards/frontier_coverage_20": -0.004782191128470004,
|
||
|
|
"rewards/frontier_coverage_25": -0.004782191128470004,
|
||
|
|
"rewards/frontier_coverage_5": -0.004782191128470004,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.21829969584941863,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.27772454619407655,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.25,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10914984792470932,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10914984792470932,
|
||
|
|
"signal/advantage_abs_mean": 0.13406893461942673,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.13406893461942673,
|
||
|
|
"signal/advantage_pre_scale_std": 0.20243431627750397,
|
||
|
|
"signal/advantage_std": 0.20243431627750397,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1590863436460495,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.20434601306915284,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01590863484889269,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01590863484889269,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.12657882273197174,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.15774886459112167,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01265788208693266,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01265788208693266,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.02765299491584301,
|
||
|
|
"signal/format_reward/group_std_mean": 0.04936157241463661,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8083333492279052,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013826497457921505,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013826497457921505,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.046808502078056334,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.06622445359826087,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0006693615694530308,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0006693615694530308,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.046808502078056334,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.06622445359826087,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0006693615694530308,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0006693615694530308,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.046808502078056334,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.06622445359826087,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0006693615694530308,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0006693615694530308,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.046808502078056334,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.06622445359826087,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0006693615694530308,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0006693615694530308,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.046808502078056334,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06622445359826087,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006693615694530308,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006693615694530308,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.046808502078056334,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06622445359826087,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006693615694530308,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006693615694530308,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.046808502078056334,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.06622445359826087,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0006693615694530308,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0006693615694530308,
|
||
|
|
"step": 35
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2676519777027838,
|
||
|
|
"calibration/batch_distribution_entropy": 0.7054325857041888,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.6344972967811773,
|
||
|
|
"calibration/confidence_entropy": 0.4799178907927473,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0,
|
||
|
|
"calibration/coverage@15%": 0.0599751997795536,
|
||
|
|
"calibration/coverage@20%": 0.28643587561566747,
|
||
|
|
"calibration/coverage@25%": 0.4707250139453013,
|
||
|
|
"calibration/coverage@30%": 0.6129981332198531,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.12413084652207873,
|
||
|
|
"calibration/mean_confidence": 0.7693312154212686,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.013541666666666697,
|
||
|
|
"completions/max_length": 3869.6,
|
||
|
|
"completions/max_terminated_length": 3869.6,
|
||
|
|
"completions/mean_length": 690.2263916015625,
|
||
|
|
"completions/mean_terminated_length": 699.692431640625,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 182.0,
|
||
|
|
"epoch": 0.09599880001499982,
|
||
|
|
"grad_norm": 0.0004507621633820236,
|
||
|
|
"learning_rate": 4.761904761904762e-06,
|
||
|
|
"loss": -0.01,
|
||
|
|
"num_tokens": 73073751.0,
|
||
|
|
"reward": 0.9667992949485779,
|
||
|
|
"reward_std": 0.15924489200115205,
|
||
|
|
"rewards/accuracy_reward": 0.6505208253860474,
|
||
|
|
"rewards/brier_reward": 0.754754900932312,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.7449337363243103,
|
||
|
|
"rewards/format_reward": 0.9853298664093018,
|
||
|
|
"rewards/frontier_coverage_0": -0.011506949504837393,
|
||
|
|
"rewards/frontier_coverage_1": -0.011506949504837393,
|
||
|
|
"rewards/frontier_coverage_10": -0.011506949504837393,
|
||
|
|
"rewards/frontier_coverage_15": -0.011506949504837393,
|
||
|
|
"rewards/frontier_coverage_20": -0.011012806138023735,
|
||
|
|
"rewards/frontier_coverage_25": -0.008019896177574991,
|
||
|
|
"rewards/frontier_coverage_5": -0.011506949504837393,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18986544907093048,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.2536569803953171,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2777777761220932,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09493272453546524,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09493272453546524,
|
||
|
|
"signal/advantage_abs_mean": 0.1158403992652893,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.1158403992652893,
|
||
|
|
"signal/advantage_pre_scale_std": 0.18982842862606047,
|
||
|
|
"signal/advantage_std": 0.18982842862606047,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.14530260264873504,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.18963007628917694,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014530261047184467,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014530261047184467,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08863844275474549,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1167033389210701,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008863845001906156,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008863845001906156,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.02526584193110466,
|
||
|
|
"signal/format_reward/group_std_mean": 0.04484616741538048,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8277777791023254,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01263292096555233,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01263292096555233,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.06523959785699844,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.08977894186973571,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0009329262189567089,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0009329262189567089,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.06523959785699844,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.08977894186973571,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0009329262189567089,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0009329262189567089,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.06523959785699844,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.08977894186973571,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0009329262189567089,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009329262189567089,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06523959785699844,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08977894186973571,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009329262189567089,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009329262189567089,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06372052878141403,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08785968273878098,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009112035506404937,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009112035506404937,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05406465157866478,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07568821161985398,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007731245132163167,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007731245132163167,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.06523959785699844,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.08977894186973571,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0009329262189567089,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0009329262189567089,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.23071652949504246,
|
||
|
|
"calibration/batch_distribution_entropy": 0.7055601707639283,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.6936541419751034,
|
||
|
|
"calibration/confidence_entropy": 0.4729729373365611,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0061111111111111106,
|
||
|
|
"calibration/coverage@15%": 0.06558502555601603,
|
||
|
|
"calibration/coverage@20%": 0.2877821522309711,
|
||
|
|
"calibration/coverage@25%": 0.69798687354932,
|
||
|
|
"calibration/coverage@30%": 0.9863517060367453,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.10290537054204563,
|
||
|
|
"calibration/mean_confidence": 0.7594771358212933,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.014409722222222188,
|
||
|
|
"completions/max_length": 3817.8,
|
||
|
|
"completions/max_terminated_length": 3817.8,
|
||
|
|
"completions/mean_length": 731.4316040039063,
|
||
|
|
"completions/mean_terminated_length": 742.1711059570313,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 232.6,
|
||
|
|
"epoch": 0.1079986500168748,
|
||
|
|
"grad_norm": 0.00046814393135719,
|
||
|
|
"learning_rate": 4.909638554216868e-06,
|
||
|
|
"loss": -0.0112,
|
||
|
|
"num_tokens": 84635107.0,
|
||
|
|
"reward": 0.9677613854408265,
|
||
|
|
"reward_std": 0.15975097417831421,
|
||
|
|
"rewards/accuracy_reward": 0.6506076574325561,
|
||
|
|
"rewards/brier_reward": 0.7581860542297363,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.7537668347358704,
|
||
|
|
"rewards/format_reward": 0.984375,
|
||
|
|
"rewards/frontier_coverage_0": -0.011279890162404627,
|
||
|
|
"rewards/frontier_coverage_1": -0.011279890162404627,
|
||
|
|
"rewards/frontier_coverage_10": -0.011279890162404627,
|
||
|
|
"rewards/frontier_coverage_15": -0.011279890162404627,
|
||
|
|
"rewards/frontier_coverage_20": -0.007542286452371627,
|
||
|
|
"rewards/frontier_coverage_25": -0.0007611054461449385,
|
||
|
|
"rewards/frontier_coverage_5": -0.011279890162404627,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19601236879825593,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.25668974220752716,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.27777778506278994,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09800618439912796,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09800618439912796,
|
||
|
|
"signal/advantage_abs_mean": 0.11878292560577393,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.11878292560577393,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1898341953754425,
|
||
|
|
"signal/advantage_std": 0.1898341953754425,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1457345962524414,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.19106005132198334,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014573459327220917,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014573459327220917,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.10429143160581589,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.13065478056669236,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010429143160581588,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010429143160581588,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.02532552070915699,
|
||
|
|
"signal/format_reward/group_std_mean": 0.04548909664154053,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.819444453716278,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012662760354578495,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012662760354578495,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.07666564732789993,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.10538152903318405,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001096318766940385,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001096318766940385,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.07666564732789993,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.10538152903318405,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001096318766940385,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001096318766940385,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.07666564732789993,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.10538152903318405,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001096318766940385,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001096318766940385,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07666564732789993,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10538152903318405,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001096318766940385,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001096318766940385,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0640810675919056,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08941369652748107,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000916359294205904,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000916359294205904,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04283785969018936,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.062041699141263965,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006125814048573375,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006125814048573375,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.07666564732789993,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.10538152903318405,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001096318766940385,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001096318766940385,
|
||
|
|
"step": 45
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.39586401271300875,
|
||
|
|
"calibration/batch_distribution_entropy": 0.7615897964351726,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.7270643773427474,
|
||
|
|
"calibration/confidence_entropy": 0.5045923336911955,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.012635389036251105,
|
||
|
|
"calibration/coverage@15%": 0.013696396993810787,
|
||
|
|
"calibration/coverage@20%": 0.023667813000846843,
|
||
|
|
"calibration/coverage@25%": 0.03044551748633497,
|
||
|
|
"calibration/coverage@30%": 0.15728762274949287,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.21160105286348876,
|
||
|
|
"calibration/mean_confidence": 0.7227722715219795,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00737847222222221,
|
||
|
|
"completions/max_length": 3473.0,
|
||
|
|
"completions/max_terminated_length": 3473.0,
|
||
|
|
"completions/mean_length": 759.92587890625,
|
||
|
|
"completions/mean_terminated_length": 765.5557373046875,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 232.0,
|
||
|
|
"epoch": 0.11999850001874976,
|
||
|
|
"grad_norm": 0.0005435345810838044,
|
||
|
|
"learning_rate": 4.759036144578314e-06,
|
||
|
|
"loss": -0.0043,
|
||
|
|
"num_tokens": 96487053.0,
|
||
|
|
"reward": 0.9660153031349182,
|
||
|
|
"reward_std": 0.1462629795074463,
|
||
|
|
"rewards/accuracy_reward": 0.6291666626930237,
|
||
|
|
"rewards/brier_reward": 0.7552559852600098,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.7993631482124328,
|
||
|
|
"rewards/format_reward": 0.9925347208976746,
|
||
|
|
"rewards/frontier_coverage_0": -0.00474322558275162,
|
||
|
|
"rewards/frontier_coverage_1": -0.00474322558275162,
|
||
|
|
"rewards/frontier_coverage_10": -0.00474322558275162,
|
||
|
|
"rewards/frontier_coverage_15": -0.00474322558275162,
|
||
|
|
"rewards/frontier_coverage_20": -0.0008788239560090005,
|
||
|
|
"rewards/frontier_coverage_25": 0.0038009291049093006,
|
||
|
|
"rewards/frontier_coverage_5": -0.00474322558275162,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18924696147441863,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.2494819164276123,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.29166667759418485,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09462348073720932,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09462348073720932,
|
||
|
|
"signal/advantage_abs_mean": 0.10812054872512818,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.10812054872512818,
|
||
|
|
"signal/advantage_pre_scale_std": 0.17321833372116088,
|
||
|
|
"signal/advantage_std": 0.17321833372116088,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13809363842010497,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.18017106652259826,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013809364847838878,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013809364847838878,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.079685477912426,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10326177328824997,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00796854794025421,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00796854794025421,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.013270399440079928,
|
||
|
|
"signal/format_reward/group_std_mean": 0.02645639069378376,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8861111164093017,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006635199720039964,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006635199720039964,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.08481028228998184,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.11866706758737564,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012127869995310903,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012127869995310903,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.08481028228998184,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.11866706758737564,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012127869995310903,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012127869995310903,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.08481028228998184,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.11866706758737564,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012127869995310903,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012127869995310903,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08481028228998184,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11866706758737564,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012127869995310903,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012127869995310903,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07179783061146736,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10189384371042251,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001026709016878158,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001026709016878158,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.046491443365812304,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06868501603603364,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006648276466876268,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006648276466876268,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.08481028228998184,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.11866706758737564,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012127869995310903,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012127869995310903,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11999850001874976,
|
||
|
|
"eval_calibration/aurc": 0.2776295888815277,
|
||
|
|
"eval_calibration/batch_distribution_entropy": 0.7319698598432759,
|
||
|
|
"eval_calibration/buffer_distribution_entropy": 0.7449058358345398,
|
||
|
|
"eval_calibration/confidence_entropy": 0.5090239030799538,
|
||
|
|
"eval_calibration/coverage@0%": 0.08854166666666667,
|
||
|
|
"eval_calibration/coverage@1%": 0.08854166666666667,
|
||
|
|
"eval_calibration/coverage@10%": 0.171875,
|
||
|
|
"eval_calibration/coverage@15%": 0.2604166666666667,
|
||
|
|
"eval_calibration/coverage@20%": 0.2708333333333333,
|
||
|
|
"eval_calibration/coverage@25%": 0.3541666666666667,
|
||
|
|
"eval_calibration/coverage@30%": 0.6145833333333334,
|
||
|
|
"eval_calibration/coverage@5%": 0.08854166666666667,
|
||
|
|
"eval_calibration/ece": 0.165546875,
|
||
|
|
"eval_calibration/mean_confidence": 0.7360677083333335,
|
||
|
|
"eval_completions/clipped_ratio": 0.006944444444444438,
|
||
|
|
"eval_completions/max_length": 2543.1666666666665,
|
||
|
|
"eval_completions/max_terminated_length": 2543.1666666666665,
|
||
|
|
"eval_completions/mean_length": 727.5720621744791,
|
||
|
|
"eval_completions/mean_terminated_length": 732.6880798339844,
|
||
|
|
"eval_completions/min_length": 45.833333333333336,
|
||
|
|
"eval_completions/min_terminated_length": 270.3333333333333,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 96487053.0,
|
||
|
|
"eval_reward": 0.968879888455073,
|
||
|
|
"eval_reward_std": 0.2623755360643069,
|
||
|
|
"eval_rewards/accuracy_reward": 0.6397569378217062,
|
||
|
|
"eval_rewards/brier_reward": 0.7657919128735861,
|
||
|
|
"eval_rewards/confidence_uniqueness_reward": 0.7643194397290548,
|
||
|
|
"eval_rewards/format_reward": 0.9921875,
|
||
|
|
"eval_rewards/frontier_coverage_0": -0.002180379558315811,
|
||
|
|
"eval_rewards/frontier_coverage_1": -0.002180379558315811,
|
||
|
|
"eval_rewards/frontier_coverage_10": -0.002180379558315811,
|
||
|
|
"eval_rewards/frontier_coverage_15": -0.001989841514538663,
|
||
|
|
"eval_rewards/frontier_coverage_20": -0.0009192682919092476,
|
||
|
|
"eval_rewards/frontier_coverage_25": 0.00439577810660315,
|
||
|
|
"eval_rewards/frontier_coverage_5": -0.002180379558315811,
|
||
|
|
"eval_runtime": 204.3039,
|
||
|
|
"eval_samples_per_second": 4.895,
|
||
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.44677734375,
|
||
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.47931412359078723,
|
||
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.223388671875,
|
||
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
||
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.223388671875,
|
||
|
|
"eval_signal/advantage_abs_mean": 0.23765324552853903,
|
||
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.23765324552853903,
|
||
|
|
"eval_signal/advantage_pre_scale_std": 0.2600039492050807,
|
||
|
|
"eval_signal/advantage_std": 0.2600039492050807,
|
||
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.21157046655813852,
|
||
|
|
"eval_signal/brier_reward/group_std_mean": 0.26279614369074505,
|
||
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021157047400871914,
|
||
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.021157047400871914,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.10844459633032481,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.13759969919919968,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010844459757208824,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010844459757208824,
|
||
|
|
"eval_signal/format_reward/centered_abs_mean": 0.015136718284338713,
|
||
|
|
"eval_signal/format_reward/group_std_mean": 0.044194173999130726,
|
||
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.7500000298023224,
|
||
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007568359142169356,
|
||
|
|
"eval_signal/format_reward/weight": 0.5,
|
||
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359142169356,
|
||
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.10852197060982387,
|
||
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.16503738115231195,
|
||
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015518641448579729,
|
||
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015518641448579729,
|
||
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.10852197060982387,
|
||
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.16503738115231195,
|
||
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015518641448579729,
|
||
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015518641448579729,
|
||
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.10852197060982387,
|
||
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.16503738115231195,
|
||
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015518641448579729,
|
||
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015518641448579729,
|
||
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.10724649329980214,
|
||
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.16348060965538025,
|
||
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015336248131158452,
|
||
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015336248131158452,
|
||
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.09694457550843556,
|
||
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.14916668087244034,
|
||
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001386307393355916,
|
||
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001386307393355916,
|
||
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.06621957384049892,
|
||
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.10746484374006589,
|
||
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009469399325704823,
|
||
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009469399325704823,
|
||
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.10852197060982387,
|
||
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.16503738115231195,
|
||
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015518641448579729,
|
||
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015518641448579729,
|
||
|
|
"eval_steps_per_second": 0.029,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.31649640361405623,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8044629368041323,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.7632590824091535,
|
||
|
|
"calibration/confidence_entropy": 0.527233352329629,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.021354166666666667,
|
||
|
|
"calibration/coverage@15%": 0.029228182414698163,
|
||
|
|
"calibration/coverage@20%": 0.10320702099737533,
|
||
|
|
"calibration/coverage@25%": 0.3614583333333333,
|
||
|
|
"calibration/coverage@30%": 0.40374331550802134,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.13912997885501577,
|
||
|
|
"calibration/mean_confidence": 0.704141535873572,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.006250000000000022,
|
||
|
|
"completions/max_length": 3575.8,
|
||
|
|
"completions/max_terminated_length": 3575.8,
|
||
|
|
"completions/mean_length": 764.2988037109375,
|
||
|
|
"completions/mean_terminated_length": 769.2012573242188,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 205.4,
|
||
|
|
"epoch": 0.13199835002062474,
|
||
|
|
"grad_norm": 0.00045138923451304436,
|
||
|
|
"learning_rate": 4.60843373493976e-06,
|
||
|
|
"loss": -0.0045,
|
||
|
|
"num_tokens": 108372351.0,
|
||
|
|
"reward": 0.9776891589164733,
|
||
|
|
"reward_std": 0.14157166481018066,
|
||
|
|
"rewards/accuracy_reward": 0.6438368082046508,
|
||
|
|
"rewards/brier_reward": 0.7670759439468384,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.8284211039543152,
|
||
|
|
"rewards/format_reward": 0.9937500119209289,
|
||
|
|
"rewards/frontier_coverage_0": -0.008810842200182379,
|
||
|
|
"rewards/frontier_coverage_1": -0.008810842200182379,
|
||
|
|
"rewards/frontier_coverage_10": -0.008810842200182379,
|
||
|
|
"rewards/frontier_coverage_15": -0.007830455573275686,
|
||
|
|
"rewards/frontier_coverage_20": -0.004610662302002311,
|
||
|
|
"rewards/frontier_coverage_25": 0.0019515341526130214,
|
||
|
|
"rewards/frontier_coverage_5": -0.008810842200182379,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18646375834941864,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.24433983564376832,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.31666667461395265,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09323187917470932,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09323187917470932,
|
||
|
|
"signal/advantage_abs_mean": 0.10541787147521972,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.10541787147521972,
|
||
|
|
"signal/advantage_pre_scale_std": 0.16827026903629302,
|
||
|
|
"signal/advantage_std": 0.16827026903629302,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13579574525356292,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1776826024055481,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013579574786126614,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013579574786126614,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07823738157749176,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1013486623764038,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007823738548904658,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007823738548904658,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.01110026049427688,
|
||
|
|
"signal/format_reward/group_std_mean": 0.0223752673715353,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.9027777910232544,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00555013024713844,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00555013024713844,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.09882133305072785,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.13713002651929856,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014131450327113271,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014131450327113271,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09882133305072785,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.13713002651929856,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014131450327113271,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014131450327113271,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09882133305072785,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.13713002651929856,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014131450327113271,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014131450327113271,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09499142318964005,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13223906755447387,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013583773747086526,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013583773747086526,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08681065887212754,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12124353647232056,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012413924559950829,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012413924559950829,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06926739811897278,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09777135252952576,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009905237704515458,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009905237704515458,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09882133305072785,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.13713002651929856,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014131450327113271,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014131450327113271,
|
||
|
|
"step": 55
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.3382260111628641,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8498678716551673,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8051509048121346,
|
||
|
|
"calibration/confidence_entropy": 0.5316433536264733,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.042708333333333334,
|
||
|
|
"calibration/coverage@15%": 0.2808253627968338,
|
||
|
|
"calibration/coverage@20%": 0.3194285460613261,
|
||
|
|
"calibration/coverage@25%": 0.36942854606132614,
|
||
|
|
"calibration/coverage@30%": 0.46338289628488666,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.1688721630430574,
|
||
|
|
"calibration/mean_confidence": 0.6809634581476074,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0057291666666666515,
|
||
|
|
"completions/max_length": 3641.0,
|
||
|
|
"completions/max_terminated_length": 3641.0,
|
||
|
|
"completions/mean_length": 771.4474853515625,
|
||
|
|
"completions/mean_terminated_length": 775.8955322265625,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 225.6,
|
||
|
|
"epoch": 0.14399820002249972,
|
||
|
|
"grad_norm": 0.0004424219368956983,
|
||
|
|
"learning_rate": 4.457831325301205e-06,
|
||
|
|
"loss": -0.0032,
|
||
|
|
"num_tokens": 120356002.0,
|
||
|
|
"reward": 0.9673421621322632,
|
||
|
|
"reward_std": 0.14665516316890717,
|
||
|
|
"rewards/accuracy_reward": 0.6087673544883728,
|
||
|
|
"rewards/brier_reward": 0.7633313059806823,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.8849505186080933,
|
||
|
|
"rewards/format_reward": 0.9940104007720947,
|
||
|
|
"rewards/frontier_coverage_0": 0.010127071291208267,
|
||
|
|
"rewards/frontier_coverage_1": 0.010127071291208267,
|
||
|
|
"rewards/frontier_coverage_10": 0.010127071291208267,
|
||
|
|
"rewards/frontier_coverage_15": 0.010443565156310796,
|
||
|
|
"rewards/frontier_coverage_20": 0.012819062476046384,
|
||
|
|
"rewards/frontier_coverage_25": 0.0149055490270257,
|
||
|
|
"rewards/frontier_coverage_5": 0.010127071291208267,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19885525107383728,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.2586120396852493,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.28333333134651184,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09942762553691864,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09942762553691864,
|
||
|
|
"signal/advantage_abs_mean": 0.10986414402723313,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.10986414402723313,
|
||
|
|
"signal/advantage_pre_scale_std": 0.17161572575569153,
|
||
|
|
"signal/advantage_std": 0.17161572575569153,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1456875115633011,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.18925343751907348,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01456875205039978,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01456875205039978,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07028612047433853,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0925293281674385,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007028611935675144,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007028611935675144,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.01108398474752903,
|
||
|
|
"signal/format_reward/group_std_mean": 0.025465189665555953,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8777777791023255,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005541992373764515,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005541992373764515,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.11798207312822342,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1616940289735794,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016871436731889845,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016871436731889845,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11798207312822342,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1616940289735794,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016871436731889845,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016871436731889845,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11798207312822342,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1616940289735794,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016871436731889845,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016871436731889845,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11572056114673615,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15887772738933564,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016548039624467493,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016548039624467493,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10319755375385284,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14280655086040497,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014757250202819705,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014757250202819705,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08172965943813323,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11435115933418274,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011687340680509805,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011687340680509805,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11798207312822342,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1616940289735794,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016871436731889845,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016871436731889845,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.24587685737231602,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8593260113425367,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8419197575160856,
|
||
|
|
"calibration/confidence_entropy": 0.5353930508126851,
|
||
|
|
"calibration/coverage@0%": 0.017225576588337684,
|
||
|
|
"calibration/coverage@1%": 0.017225576588337684,
|
||
|
|
"calibration/coverage@10%": 0.175674499564839,
|
||
|
|
"calibration/coverage@15%": 0.4178592798085291,
|
||
|
|
"calibration/coverage@20%": 0.5347761640557007,
|
||
|
|
"calibration/coverage@25%": 0.64177545691906,
|
||
|
|
"calibration/coverage@30%": 0.7441253263707572,
|
||
|
|
"calibration/coverage@5%": 0.017747769799825935,
|
||
|
|
"calibration/ece": 0.14085569002805973,
|
||
|
|
"calibration/mean_confidence": 0.6692040214761524,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.002777777777777768,
|
||
|
|
"completions/max_length": 3182.6,
|
||
|
|
"completions/max_terminated_length": 3182.6,
|
||
|
|
"completions/mean_length": 761.7659790039063,
|
||
|
|
"completions/mean_terminated_length": 763.8692138671875,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 215.8,
|
||
|
|
"epoch": 0.1559980500243747,
|
||
|
|
"grad_norm": 0.0004698596312664449,
|
||
|
|
"learning_rate": 4.307228915662651e-06,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"num_tokens": 132225594.0,
|
||
|
|
"reward": 0.995530652999878,
|
||
|
|
"reward_std": 0.13234637379646302,
|
||
|
|
"rewards/accuracy_reward": 0.64921875,
|
||
|
|
"rewards/brier_reward": 0.785097849369049,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9373332023620605,
|
||
|
|
"rewards/format_reward": 0.9971354007720947,
|
||
|
|
"rewards/frontier_coverage_0": -0.002034256886690855,
|
||
|
|
"rewards/frontier_coverage_1": -0.002034256886690855,
|
||
|
|
"rewards/frontier_coverage_10": -0.002034256886690855,
|
||
|
|
"rewards/frontier_coverage_15": -0.0016045193187892437,
|
||
|
|
"rewards/frontier_coverage_20": 0.0038384980522096156,
|
||
|
|
"rewards/frontier_coverage_25": 0.013624860998243093,
|
||
|
|
"rewards/frontier_coverage_5": -0.002034256886690855,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18282877504825593,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.24428035020828248,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.29722222685813904,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09141438752412796,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09141438752412796,
|
||
|
|
"signal/advantage_abs_mean": 0.09773297160863877,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09773297160863877,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1567333608865738,
|
||
|
|
"signal/advantage_std": 0.1567333608865738,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13010090589523315,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1674443781375885,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01301009114831686,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01301009114831686,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03651793897151947,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.051064802706241606,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003651794046163559,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003651794046163559,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.005430772574618459,
|
||
|
|
"signal/format_reward/group_std_mean": 0.013679004088044167,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.9305555582046509,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0027153862873092295,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0027153862873092295,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.12229467630386352,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.16521598398685455,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017488139681518077,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017488139681518077,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12229467630386352,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16521598398685455,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017488139681518077,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017488139681518077,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12229467630386352,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16521598398685455,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017488139681518077,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017488139681518077,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12053841352462769,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16303691267967224,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017236994579434394,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017236994579434394,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09820731431245804,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.13474227339029313,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014043646398931742,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014043646398931742,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07332679852843285,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10180476605892182,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010485732345841825,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010485732345841825,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12229467630386352,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16521598398685455,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017488139681518077,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017488139681518077,
|
||
|
|
"step": 65
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.26958251359481367,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8663688799379698,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8733795984294593,
|
||
|
|
"calibration/confidence_entropy": 0.577260486680282,
|
||
|
|
"calibration/coverage@0%": 0.02308205424394914,
|
||
|
|
"calibration/coverage@1%": 0.02308205424394914,
|
||
|
|
"calibration/coverage@10%": 0.12958876708285477,
|
||
|
|
"calibration/coverage@15%": 0.14900721673999842,
|
||
|
|
"calibration/coverage@20%": 0.2683685738270198,
|
||
|
|
"calibration/coverage@25%": 0.3697268444811656,
|
||
|
|
"calibration/coverage@30%": 0.6402569338436326,
|
||
|
|
"calibration/coverage@5%": 0.0755754925641591,
|
||
|
|
"calibration/ece": 0.09930710020716957,
|
||
|
|
"calibration/mean_confidence": 0.6231611950933489,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.005902777777777768,
|
||
|
|
"completions/max_length": 3517.4,
|
||
|
|
"completions/max_terminated_length": 3517.4,
|
||
|
|
"completions/mean_length": 762.48056640625,
|
||
|
|
"completions/mean_terminated_length": 767.0295288085938,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 196.6,
|
||
|
|
"epoch": 0.16799790002624967,
|
||
|
|
"grad_norm": 0.00045749920536763966,
|
||
|
|
"learning_rate": 4.156626506024097e-06,
|
||
|
|
"loss": -0.0059,
|
||
|
|
"num_tokens": 144087514.0,
|
||
|
|
"reward": 0.9868767857551575,
|
||
|
|
"reward_std": 0.1281371980905533,
|
||
|
|
"rewards/accuracy_reward": 0.637586796283722,
|
||
|
|
"rewards/brier_reward": 0.7736868143081665,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9424182176589966,
|
||
|
|
"rewards/format_reward": 0.9940972208976746,
|
||
|
|
"rewards/frontier_coverage_0": -0.008795747673138976,
|
||
|
|
"rewards/frontier_coverage_1": -0.008795747673138976,
|
||
|
|
"rewards/frontier_coverage_10": -0.008733327453956007,
|
||
|
|
"rewards/frontier_coverage_15": -0.008401900064200163,
|
||
|
|
"rewards/frontier_coverage_20": -0.0042295768857002255,
|
||
|
|
"rewards/frontier_coverage_25": 0.007490093156229704,
|
||
|
|
"rewards/frontier_coverage_5": -0.008795747673138976,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17555881142616273,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.2333855837583542,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3305555582046509,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08777940571308136,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08777940571308136,
|
||
|
|
"signal/advantage_abs_mean": 0.09379418194293976,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09379418194293976,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1555002361536026,
|
||
|
|
"signal/advantage_std": 0.1555002361536026,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12457352876663208,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.160868501663208,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012457353435456753,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012457353435456753,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.032642674446105954,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04912559166550636,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032642676495015623,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032642676495015623,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.010753038246184588,
|
||
|
|
"signal/format_reward/group_std_mean": 0.02312941402196884,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8944444537162781,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005376519123092294,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005376519123092294,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13515576124191284,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17945427298545838,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019327274756506085,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019327274756506085,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13515576124191284,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17945427298545838,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019327274756506085,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019327274756506085,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.134914430975914,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17911535501480103,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019292764598503708,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019292764598503708,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.131193308532238,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1743150144815445,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018760643433779478,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018760643433779478,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10956997573375701,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14680338203907012,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001566850603558123,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001566850603558123,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06231881156563759,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08558403998613358,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000891158974263817,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000891158974263817,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13515576124191284,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17945427298545838,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019327274756506085,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019327274756506085,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2563588085190992,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8382992480473315,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8805381439631234,
|
||
|
|
"calibration/confidence_entropy": 0.5695789505685956,
|
||
|
|
"calibration/coverage@0%": 0.02257520030390938,
|
||
|
|
"calibration/coverage@1%": 0.02257520030390938,
|
||
|
|
"calibration/coverage@10%": 0.1989478432794585,
|
||
|
|
"calibration/coverage@15%": 0.27763706140350874,
|
||
|
|
"calibration/coverage@20%": 0.41623903508771926,
|
||
|
|
"calibration/coverage@25%": 0.586359649122807,
|
||
|
|
"calibration/coverage@30%": 0.6760910087719298,
|
||
|
|
"calibration/coverage@5%": 0.027838358198646225,
|
||
|
|
"calibration/ece": 0.10921236148819072,
|
||
|
|
"calibration/mean_confidence": 0.6479703220803527,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.002951388888888884,
|
||
|
|
"completions/max_length": 3187.4,
|
||
|
|
"completions/max_terminated_length": 3187.4,
|
||
|
|
"completions/mean_length": 755.7357788085938,
|
||
|
|
"completions/mean_terminated_length": 757.99462890625,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 218.8,
|
||
|
|
"epoch": 0.17999775002812465,
|
||
|
|
"grad_norm": 0.0004290560900699347,
|
||
|
|
"learning_rate": 4.006024096385543e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"num_tokens": 155858486.0,
|
||
|
|
"reward": 1.017405092716217,
|
||
|
|
"reward_std": 0.11944967806339264,
|
||
|
|
"rewards/accuracy_reward": 0.6934895753860474,
|
||
|
|
"rewards/brier_reward": 0.7949100136756897,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9398416519165039,
|
||
|
|
"rewards/format_reward": 0.9970486164093018,
|
||
|
|
"rewards/frontier_coverage_0": -0.023628878220915795,
|
||
|
|
"rewards/frontier_coverage_1": -0.023628878220915795,
|
||
|
|
"rewards/frontier_coverage_10": -0.021788668585941195,
|
||
|
|
"rewards/frontier_coverage_15": -0.014927842747420072,
|
||
|
|
"rewards/frontier_coverage_20": -0.003723863745108247,
|
||
|
|
"rewards/frontier_coverage_25": 0.017198705207556488,
|
||
|
|
"rewards/frontier_coverage_5": -0.023147269897162915,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16501193642616271,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.21951915323734283,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3694444537162781,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08250596821308136,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08250596821308136,
|
||
|
|
"signal/advantage_abs_mean": 0.08793365359306335,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.08793365359306335,
|
||
|
|
"signal/advantage_pre_scale_std": 0.14816934764385223,
|
||
|
|
"signal/advantage_std": 0.14816934764385223,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.11173765361309052,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.14379720985889435,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011173765547573567,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011173765547573567,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030797071009874343,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0448210634291172,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030797069426625966,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030797069426625966,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.005631510401144624,
|
||
|
|
"signal/format_reward/group_std_mean": 0.01455035675317049,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.925,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002815755200572312,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.002815755200572312,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.11744404733180999,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.15851396322250366,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001679449831135571,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001679449831135571,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11744404733180999,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15851396322250366,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001679449831135571,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001679449831135571,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11422204971313477,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15434344708919526,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016333752777427436,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016333752777427436,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10063839107751846,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13689128160476685,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014391290256753563,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014391290256753563,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07208155021071434,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0996133729815483,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001030766183976084,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001030766183976084,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.043751812726259234,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06023159921169281,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006256509223021567,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006256509223021567,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11698432117700577,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15789782702922822,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016728756949305535,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016728756949305535,
|
||
|
|
"step": 75
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.21554772857493187,
|
||
|
|
"calibration/batch_distribution_entropy": 0.7793047369317647,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.859575274807294,
|
||
|
|
"calibration/confidence_entropy": 0.5226812675460125,
|
||
|
|
"calibration/coverage@0%": 0.012215909090909092,
|
||
|
|
"calibration/coverage@1%": 0.012215909090909092,
|
||
|
|
"calibration/coverage@10%": 0.043605169340463455,
|
||
|
|
"calibration/coverage@15%": 0.41333556149732625,
|
||
|
|
"calibration/coverage@20%": 0.47428141711229943,
|
||
|
|
"calibration/coverage@25%": 0.7336229946524064,
|
||
|
|
"calibration/coverage@30%": 0.7927083333333333,
|
||
|
|
"calibration/coverage@5%": 0.01756350267379679,
|
||
|
|
"calibration/ece": 0.11169882524136499,
|
||
|
|
"calibration/mean_confidence": 0.7130084327986905,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.005642361111111116,
|
||
|
|
"completions/max_length": 3469.4,
|
||
|
|
"completions/max_terminated_length": 3469.4,
|
||
|
|
"completions/mean_length": 808.5981689453125,
|
||
|
|
"completions/mean_terminated_length": 813.23388671875,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 204.4,
|
||
|
|
"epoch": 0.19199760002999963,
|
||
|
|
"grad_norm": 0.0005567002226598561,
|
||
|
|
"learning_rate": 3.855421686746989e-06,
|
||
|
|
"loss": -0.0042,
|
||
|
|
"num_tokens": 168226817.0,
|
||
|
|
"reward": 0.9941539883613586,
|
||
|
|
"reward_std": 0.13035476952791214,
|
||
|
|
"rewards/accuracy_reward": 0.6485243082046509,
|
||
|
|
"rewards/brier_reward": 0.7861994743347168,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9314009189605713,
|
||
|
|
"rewards/format_reward": 0.9942708492279053,
|
||
|
|
"rewards/frontier_coverage_0": 0.005442132381722331,
|
||
|
|
"rewards/frontier_coverage_1": 0.005442132381722331,
|
||
|
|
"rewards/frontier_coverage_10": 0.006071169814094901,
|
||
|
|
"rewards/frontier_coverage_15": 0.007214262872003019,
|
||
|
|
"rewards/frontier_coverage_20": 0.011351470567751676,
|
||
|
|
"rewards/frontier_coverage_25": 0.02855553664267063,
|
||
|
|
"rewards/frontier_coverage_5": 0.005597225157544017,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17549370527267455,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.23448271155357361,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.32222222089767455,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08774685263633727,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08774685263633727,
|
||
|
|
"signal/advantage_abs_mean": 0.09637740403413772,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09637740403413772,
|
||
|
|
"signal/advantage_pre_scale_std": 0.16058520078659058,
|
||
|
|
"signal/advantage_std": 0.16058520078659058,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12150197178125381,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.15757565200328827,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01215019728988409,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01215019728988409,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.038753630965948103,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05312336310744285,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003875363012775779,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003875363012775779,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00966796875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.018474388308823107,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.9222222328186035,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004833984375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.004833984375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.10300857871770859,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.14024612605571746,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014730226481333374,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014730226481333374,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10300857871770859,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.14024612605571746,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014730226481333374,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014730226481333374,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0971254363656044,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.132836189866066,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001388893718831241,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001388893718831241,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08325443416833878,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11486416459083557,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011905384133569896,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011905384133569896,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.056342567503452304,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07883516997098923,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008056987193413079,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008056987193413079,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04240647032856941,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.05655211955308914,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006064124754630029,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006064124754630029,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10220045298337936,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1392603486776352,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014614664250984788,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014614664250984788,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.22017950153274862,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8714044348934851,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8522211124256589,
|
||
|
|
"calibration/confidence_entropy": 0.5380838558752095,
|
||
|
|
"calibration/coverage@0%": 0.036259588016304314,
|
||
|
|
"calibration/coverage@1%": 0.036259588016304314,
|
||
|
|
"calibration/coverage@10%": 0.11721307560118954,
|
||
|
|
"calibration/coverage@15%": 0.3834662270669858,
|
||
|
|
"calibration/coverage@20%": 0.4646737963842374,
|
||
|
|
"calibration/coverage@25%": 0.6478996052484998,
|
||
|
|
"calibration/coverage@30%": 0.8137590066730672,
|
||
|
|
"calibration/coverage@5%": 0.059398362336180996,
|
||
|
|
"calibration/ece": 0.11638557739382216,
|
||
|
|
"calibration/mean_confidence": 0.6332810296217062,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.004427083333333348,
|
||
|
|
"completions/max_length": 3764.4,
|
||
|
|
"completions/max_terminated_length": 3764.4,
|
||
|
|
"completions/mean_length": 794.3666748046875,
|
||
|
|
"completions/mean_terminated_length": 797.9120361328125,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 225.6,
|
||
|
|
"epoch": 0.2039974500318746,
|
||
|
|
"grad_norm": 0.00044087867718189955,
|
||
|
|
"learning_rate": 3.7048192771084342e-06,
|
||
|
|
"loss": -0.0022,
|
||
|
|
"num_tokens": 180465121.0,
|
||
|
|
"reward": 1.0135318279266357,
|
||
|
|
"reward_std": 0.13029464483261108,
|
||
|
|
"rewards/accuracy_reward": 0.6832465171813965,
|
||
|
|
"rewards/brier_reward": 0.8005435109138489,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9350399494171142,
|
||
|
|
"rewards/format_reward": 0.9953993082046508,
|
||
|
|
"rewards/frontier_coverage_0": -0.0016260695294477046,
|
||
|
|
"rewards/frontier_coverage_1": -0.0016260695294477046,
|
||
|
|
"rewards/frontier_coverage_10": -0.0008423494873568416,
|
||
|
|
"rewards/frontier_coverage_15": 0.00238975181709975,
|
||
|
|
"rewards/frontier_coverage_20": 0.011206477042287588,
|
||
|
|
"rewards/frontier_coverage_25": 0.0376150730997324,
|
||
|
|
"rewards/frontier_coverage_5": -0.0016260695294477046,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17951931357383727,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.23702281415462495,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3305555582046509,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08975965678691863,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08975965678691863,
|
||
|
|
"signal/advantage_abs_mean": 0.09476174563169479,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09476174563169479,
|
||
|
|
"signal/advantage_pre_scale_std": 0.15883181095123292,
|
||
|
|
"signal/advantage_std": 0.15883181095123292,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.11919141858816147,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1569477528333664,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011919141374528408,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011919141374528408,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.034977962449193004,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.052396781742572784,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034977963194251062,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034977963194251062,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.008707682136446238,
|
||
|
|
"signal/format_reward/group_std_mean": 0.02160101868212223,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8916666746139527,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004353841068223119,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.004353841068223119,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.11353013217449189,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.15329338610172272,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016234809532761573,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016234809532761573,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11353013217449189,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15329338610172272,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016234809532761573,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016234809532761573,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11054201275110245,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.14934370666742325,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015807508490979672,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015807508490979672,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09602530598640442,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13076421320438386,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013731618179008364,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013731618179008364,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.058435800671577456,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08124178051948547,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008356319856829941,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008356319856829941,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.045007632672786714,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.05981680378317833,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006436091498471797,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006436091498471797,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11353013217449189,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15329338610172272,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016234809532761573,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016234809532761573,
|
||
|
|
"step": 85
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.19130533906993302,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8412225489716348,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8626075627761844,
|
||
|
|
"calibration/confidence_entropy": 0.5053431016428965,
|
||
|
|
"calibration/coverage@0%": 0.03859342792359489,
|
||
|
|
"calibration/coverage@1%": 0.03859342792359489,
|
||
|
|
"calibration/coverage@10%": 0.25211929092498037,
|
||
|
|
"calibration/coverage@15%": 0.3443653155241082,
|
||
|
|
"calibration/coverage@20%": 0.4048542125930009,
|
||
|
|
"calibration/coverage@25%": 0.831518123679618,
|
||
|
|
"calibration/coverage@30%": 0.9070597960870763,
|
||
|
|
"calibration/coverage@5%": 0.09598164593926069,
|
||
|
|
"calibration/ece": 0.0995040290382317,
|
||
|
|
"calibration/mean_confidence": 0.6870986648020756,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.006163194444444442,
|
||
|
|
"completions/max_length": 3540.4,
|
||
|
|
"completions/max_terminated_length": 3540.4,
|
||
|
|
"completions/mean_length": 758.8940307617188,
|
||
|
|
"completions/mean_terminated_length": 763.611083984375,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 206.2,
|
||
|
|
"epoch": 0.2159973000337496,
|
||
|
|
"grad_norm": 0.0005134688108228147,
|
||
|
|
"learning_rate": 3.5542168674698798e-06,
|
||
|
|
"loss": -0.0033,
|
||
|
|
"num_tokens": 192276252.0,
|
||
|
|
"reward": 1.0077428221702576,
|
||
|
|
"reward_std": 0.12586894929409026,
|
||
|
|
"rewards/accuracy_reward": 0.6733506917953491,
|
||
|
|
"rewards/brier_reward": 0.7962008833885192,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9313777089118958,
|
||
|
|
"rewards/format_reward": 0.9938367962837219,
|
||
|
|
"rewards/frontier_coverage_0": 0.004424169240519404,
|
||
|
|
"rewards/frontier_coverage_1": 0.004424169240519404,
|
||
|
|
"rewards/frontier_coverage_10": 0.005338566357386299,
|
||
|
|
"rewards/frontier_coverage_15": 0.009187003783881664,
|
||
|
|
"rewards/frontier_coverage_20": 0.01867530047893524,
|
||
|
|
"rewards/frontier_coverage_25": 0.05060422196984291,
|
||
|
|
"rewards/frontier_coverage_5": 0.004631689615052892,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16591254472732545,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.21845885515213012,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08295627236366272,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08295627236366272,
|
||
|
|
"signal/advantage_abs_mean": 0.09228640496730804,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09228640496730804,
|
||
|
|
"signal/advantage_pre_scale_std": 0.15987550914287568,
|
||
|
|
"signal/advantage_std": 0.15987550914287568,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.117860808968544,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1544253945350647,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011786081641912461,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011786081641912461,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03850234746932983,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.056425672769546506,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038502346724271774,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038502346724271774,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.011311848741024732,
|
||
|
|
"signal/format_reward/group_std_mean": 0.02502160966396332,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8833333373069763,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005655924370512366,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005655924370512366,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.09827356785535812,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.13401113748550414,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001405311981216073,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001405311981216073,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09827356785535812,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.13401113748550414,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001405311981216073,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001405311981216073,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09452640563249588,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.12922739684581758,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013517276151105762,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013517276151105762,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07233314439654351,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10092607736587525,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010343640227802099,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010343640227802099,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.049670548737049104,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06895174235105514,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007102888310328126,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007102888310328126,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05155856236815452,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.067772376537323,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007372874300926924,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007372874300926924,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09758316129446029,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.13312698602676393,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013954391703009605,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013954391703009605,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.23218282614032365,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8237960782553888,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8804108398849207,
|
||
|
|
"calibration/confidence_entropy": 0.5243356933254755,
|
||
|
|
"calibration/coverage@0%": 0.01052649005750661,
|
||
|
|
"calibration/coverage@1%": 0.01052649005750661,
|
||
|
|
"calibration/coverage@10%": 0.09966577080811616,
|
||
|
|
"calibration/coverage@15%": 0.431924512069393,
|
||
|
|
"calibration/coverage@20%": 0.617088188355147,
|
||
|
|
"calibration/coverage@25%": 0.6702744892179752,
|
||
|
|
"calibration/coverage@30%": 0.710721242177679,
|
||
|
|
"calibration/coverage@5%": 0.03245860494001314,
|
||
|
|
"calibration/ece": 0.1193284582303358,
|
||
|
|
"calibration/mean_confidence": 0.6877564386884953,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.006163194444444442,
|
||
|
|
"completions/max_length": 3526.8,
|
||
|
|
"completions/max_terminated_length": 3526.8,
|
||
|
|
"completions/mean_length": 766.1975708007812,
|
||
|
|
"completions/mean_terminated_length": 770.9320678710938,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 220.8,
|
||
|
|
"epoch": 0.22799715003562457,
|
||
|
|
"grad_norm": 0.0004157455696258694,
|
||
|
|
"learning_rate": 3.4036144578313257e-06,
|
||
|
|
"loss": -0.004,
|
||
|
|
"num_tokens": 204194528.0,
|
||
|
|
"reward": 1.0064563512802125,
|
||
|
|
"reward_std": 0.12273335456848145,
|
||
|
|
"rewards/accuracy_reward": 0.6696180582046509,
|
||
|
|
"rewards/brier_reward": 0.7975351452827454,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9334475994110107,
|
||
|
|
"rewards/format_reward": 0.9934895753860473,
|
||
|
|
"rewards/frontier_coverage_0": 0.007734180334955454,
|
||
|
|
"rewards/frontier_coverage_1": 0.007734180334955454,
|
||
|
|
"rewards/frontier_coverage_10": 0.008875045739114285,
|
||
|
|
"rewards/frontier_coverage_15": 0.013548683421686292,
|
||
|
|
"rewards/frontier_coverage_20": 0.02269660122692585,
|
||
|
|
"rewards/frontier_coverage_25": 0.05776782408356666,
|
||
|
|
"rewards/frontier_coverage_5": 0.007814234215766191,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1544704854488373,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.2076838880777359,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07723524272441865,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07723524272441865,
|
||
|
|
"signal/advantage_abs_mean": 0.08850989192724228,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.08850989192724228,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1567450851202011,
|
||
|
|
"signal/advantage_std": 0.1567450851202011,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.11523358970880508,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.15127619802951814,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011523359268903733,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011523359268903733,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.036900246143341066,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05565410703420639,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036900244653224946,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036900244653224946,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.01131184899713844,
|
||
|
|
"signal/format_reward/group_std_mean": 0.026015446335077286,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8722222208976745,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00565592449856922,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00565592449856922,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.09024225771427155,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.12343428432941436,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012904643081128597,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012904643081128597,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09024225771427155,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.12343428432941436,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012904643081128597,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012904643081128597,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.08697677999734879,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1192005679011345,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012437679572030902,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012437679572030902,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06753548979759216,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09412883222103119,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009657575283199549,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009657575283199549,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.047610755264759066,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06612022668123245,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006808338337577879,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006808338337577879,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05751822665333748,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07561186105012893,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008225106517784298,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008225106517784298,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.08994513750076294,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.12307111024856568,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012862155679613351,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012862155679613351,
|
||
|
|
"step": 95
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.20759978881743368,
|
||
|
|
"calibration/batch_distribution_entropy": 0.7872453416290367,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.851199244324374,
|
||
|
|
"calibration/confidence_entropy": 0.5093936144746041,
|
||
|
|
"calibration/coverage@0%": 0.012143117253085606,
|
||
|
|
"calibration/coverage@1%": 0.012143117253085606,
|
||
|
|
"calibration/coverage@10%": 0.10598727560385317,
|
||
|
|
"calibration/coverage@15%": 0.2561893629834925,
|
||
|
|
"calibration/coverage@20%": 0.49089280471040475,
|
||
|
|
"calibration/coverage@25%": 0.6995251651659927,
|
||
|
|
"calibration/coverage@30%": 0.8823003139129817,
|
||
|
|
"calibration/coverage@5%": 0.031241260489159883,
|
||
|
|
"calibration/ece": 0.0877942040353775,
|
||
|
|
"calibration/mean_confidence": 0.7242433693125331,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.01328125,
|
||
|
|
"completions/max_length": 3636.2,
|
||
|
|
"completions/max_terminated_length": 3636.2,
|
||
|
|
"completions/mean_length": 782.308251953125,
|
||
|
|
"completions/mean_terminated_length": 792.8037231445312,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 226.6,
|
||
|
|
"epoch": 0.23999700003749952,
|
||
|
|
"grad_norm": 0.00045198958832770586,
|
||
|
|
"learning_rate": 3.2530120481927713e-06,
|
||
|
|
"loss": -0.0101,
|
||
|
|
"num_tokens": 216305791.0,
|
||
|
|
"reward": 1.0077686071395875,
|
||
|
|
"reward_std": 0.13463030606508256,
|
||
|
|
"rewards/accuracy_reward": 0.6796875,
|
||
|
|
"rewards/brier_reward": 0.7993229150772094,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9266997456550599,
|
||
|
|
"rewards/format_reward": 0.9866319417953491,
|
||
|
|
"rewards/frontier_coverage_0": 0.009804848302155732,
|
||
|
|
"rewards/frontier_coverage_1": 0.009804848302155732,
|
||
|
|
"rewards/frontier_coverage_10": 0.01042446969076991,
|
||
|
|
"rewards/frontier_coverage_15": 0.012922577001154423,
|
||
|
|
"rewards/frontier_coverage_20": 0.02126994784921408,
|
||
|
|
"rewards/frontier_coverage_25": 0.06628896966576577,
|
||
|
|
"rewards/frontier_coverage_5": 0.009804848302155732,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16714409589767457,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.21892527341842652,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38611111640930174,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08357204794883728,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08357204794883728,
|
||
|
|
"signal/advantage_abs_mean": 0.09943573027849198,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09943573027849198,
|
||
|
|
"signal/advantage_pre_scale_std": 0.17322509586811066,
|
||
|
|
"signal/advantage_std": 0.17322509586811066,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12519195675849915,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1623040735721588,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012519196048378945,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012519196048378945,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04277070388197899,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06450802609324455,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004277070425450802,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004277070425450802,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.02023654468357563,
|
||
|
|
"signal/format_reward/group_std_mean": 0.03803690262138844,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8444444417953492,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010118272341787814,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010118272341787814,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.08244038820266723,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.11372108608484269,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0011788975214585661,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0011788975214585661,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.08244038820266723,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.11372108608484269,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0011788975214585661,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0011788975214585661,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.07991492450237274,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.11050502359867095,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0011427834630012511,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011427834630012511,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0665148988366127,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09321689903736115,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009511630749329924,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009511630749329924,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.04453737959265709,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06285597681999207,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006368845235556364,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006368845235556364,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.060834895074367526,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08100210577249527,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008699389640241861,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008699389640241861,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.08244038820266723,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.11372108608484269,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0011788975214585661,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0011788975214585661,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23999700003749952,
|
||
|
|
"eval_calibration/aurc": 0.13921226805258638,
|
||
|
|
"eval_calibration/batch_distribution_entropy": 0.7191935689276043,
|
||
|
|
"eval_calibration/buffer_distribution_entropy": 0.8323235519691029,
|
||
|
|
"eval_calibration/confidence_entropy": 0.47122875449565105,
|
||
|
|
"eval_calibration/coverage@0%": 0.16717069892473116,
|
||
|
|
"eval_calibration/coverage@1%": 0.16717069892473116,
|
||
|
|
"eval_calibration/coverage@10%": 0.46639784946236557,
|
||
|
|
"eval_calibration/coverage@15%": 0.6330645161290323,
|
||
|
|
"eval_calibration/coverage@20%": 0.837869623655914,
|
||
|
|
"eval_calibration/coverage@25%": 0.9321236559139785,
|
||
|
|
"eval_calibration/coverage@30%": 0.9635416666666666,
|
||
|
|
"eval_calibration/coverage@5%": 0.24529569892473116,
|
||
|
|
"eval_calibration/ece": 0.15433622692672344,
|
||
|
|
"eval_calibration/mean_confidence": 0.7643868913528463,
|
||
|
|
"eval_completions/clipped_ratio": 0.014756944444444456,
|
||
|
|
"eval_completions/max_length": 2571.1666666666665,
|
||
|
|
"eval_completions/max_terminated_length": 2571.1666666666665,
|
||
|
|
"eval_completions/mean_length": 752.1073099772135,
|
||
|
|
"eval_completions/mean_terminated_length": 763.3598937988281,
|
||
|
|
"eval_completions/min_length": 104.16666666666667,
|
||
|
|
"eval_completions/min_terminated_length": 271.8333333333333,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 216305791.0,
|
||
|
|
"eval_reward": 0.9933919807275137,
|
||
|
|
"eval_reward_std": 0.27725009868542355,
|
||
|
|
"eval_rewards/accuracy_reward": 0.6710069477558136,
|
||
|
|
"eval_rewards/brier_reward": 0.7916092475255331,
|
||
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8643936216831207,
|
||
|
|
"eval_rewards/format_reward": 0.980902781089147,
|
||
|
|
"eval_rewards/frontier_coverage_0": 0.011102605417060355,
|
||
|
|
"eval_rewards/frontier_coverage_1": 0.011102605417060355,
|
||
|
|
"eval_rewards/frontier_coverage_10": 0.011642855126410723,
|
||
|
|
"eval_rewards/frontier_coverage_15": 0.012925609441784522,
|
||
|
|
"eval_rewards/frontier_coverage_20": 0.017780127624670666,
|
||
|
|
"eval_rewards/frontier_coverage_25": 0.05278685626884302,
|
||
|
|
"eval_rewards/frontier_coverage_5": 0.011105729693857333,
|
||
|
|
"eval_runtime": 206.9157,
|
||
|
|
"eval_samples_per_second": 4.833,
|
||
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4260525157054265,
|
||
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4678276677926381,
|
||
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21302625785271326,
|
||
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
||
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21302625785271326,
|
||
|
|
"eval_signal/advantage_abs_mean": 0.2413168102502823,
|
||
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2413168102502823,
|
||
|
|
"eval_signal/advantage_pre_scale_std": 0.2765214368700981,
|
||
|
|
"eval_signal/advantage_std": 0.2765214368700981,
|
||
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2165469080209732,
|
||
|
|
"eval_signal/brier_reward/group_std_mean": 0.26921579490105313,
|
||
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021654691236714523,
|
||
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.021654691236714523,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06999108629922073,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.11411779932677746,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006999108707532287,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006999108707532287,
|
||
|
|
"eval_signal/format_reward/centered_abs_mean": 0.03613281218955914,
|
||
|
|
"eval_signal/format_reward/group_std_mean": 0.08657800406217575,
|
||
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.583333338300387,
|
||
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.01806640609477957,
|
||
|
|
"eval_signal/format_reward/weight": 0.5,
|
||
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.01806640609477957,
|
||
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.08888960257172585,
|
||
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.14864219104250273,
|
||
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0012711213203147054,
|
||
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012711213203147054,
|
||
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.08888960257172585,
|
||
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.14864219104250273,
|
||
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012711213203147054,
|
||
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012711213203147054,
|
||
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.08579947799444199,
|
||
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.14465376734733582,
|
||
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012269325282735128,
|
||
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012269325282735128,
|
||
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.06808544136583805,
|
||
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.12035164733727773,
|
||
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009736218586719284,
|
||
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009736218586719284,
|
||
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.04719839679698149,
|
||
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.08246325453122456,
|
||
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006749370562223097,
|
||
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006749370562223097,
|
||
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.09537930289904277,
|
||
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.12517398471633592,
|
||
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001363924064207822,
|
||
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001363924064207822,
|
||
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.08822002758582433,
|
||
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.1477730112771193,
|
||
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012615464123276372,
|
||
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012615464123276372,
|
||
|
|
"eval_steps_per_second": 0.029,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.3479992140027637,
|
||
|
|
"calibration/batch_distribution_entropy": 0.750787770254411,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8211954941825012,
|
||
|
|
"calibration/confidence_entropy": 0.46615226530585224,
|
||
|
|
"calibration/coverage@0%": 0.019922428534220153,
|
||
|
|
"calibration/coverage@1%": 0.019922428534220153,
|
||
|
|
"calibration/coverage@10%": 0.12016627849477383,
|
||
|
|
"calibration/coverage@15%": 0.1617470000037748,
|
||
|
|
"calibration/coverage@20%": 0.19590033859661704,
|
||
|
|
"calibration/coverage@25%": 0.21370138571703592,
|
||
|
|
"calibration/coverage@30%": 0.3333657334183914,
|
||
|
|
"calibration/coverage@5%": 0.09898002015725679,
|
||
|
|
"calibration/ece": 0.17690751887463851,
|
||
|
|
"calibration/mean_confidence": 0.7663661401788909,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.020659722222222232,
|
||
|
|
"completions/max_length": 3665.8,
|
||
|
|
"completions/max_terminated_length": 3665.8,
|
||
|
|
"completions/mean_length": 753.8394897460937,
|
||
|
|
"completions/mean_terminated_length": 769.899658203125,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 205.6,
|
||
|
|
"epoch": 0.2519968500393745,
|
||
|
|
"grad_norm": 0.00047751839156262577,
|
||
|
|
"learning_rate": 3.1024096385542172e-06,
|
||
|
|
"loss": -0.017,
|
||
|
|
"num_tokens": 228066886.0,
|
||
|
|
"reward": 0.9950996160507202,
|
||
|
|
"reward_std": 0.14413480460643768,
|
||
|
|
"rewards/accuracy_reward": 0.6709201335906982,
|
||
|
|
"rewards/brier_reward": 0.774049949645996,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9144436717033386,
|
||
|
|
"rewards/format_reward": 0.9793402791023255,
|
||
|
|
"rewards/frontier_coverage_0": 0.0029952601238619537,
|
||
|
|
"rewards/frontier_coverage_1": 0.0029952601238619537,
|
||
|
|
"rewards/frontier_coverage_10": 0.002985831905971281,
|
||
|
|
"rewards/frontier_coverage_15": 0.005100842425599694,
|
||
|
|
"rewards/frontier_coverage_20": 0.011829984840005636,
|
||
|
|
"rewards/frontier_coverage_25": 0.04955209493637085,
|
||
|
|
"rewards/frontier_coverage_5": 0.002863927249563858,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16414388120174409,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.2145601123571396,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.402777773141861,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08207194060087204,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08207194060087204,
|
||
|
|
"signal/advantage_abs_mean": 0.10552676767110825,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.10552676767110825,
|
||
|
|
"signal/advantage_pre_scale_std": 0.18652166426181793,
|
||
|
|
"signal/advantage_std": 0.18652166426181793,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13289882838726044,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.17235172688961028,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01328988280147314,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01328988280147314,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05428531989455223,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0814499482512474,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00542853195220232,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00542853195220232,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.03229166679084301,
|
||
|
|
"signal/format_reward/group_std_mean": 0.056061620265245436,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.7833333373069763,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016145833395421506,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.016145833395421506,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.06614647805690765,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.09260518848896027,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0009458946529775858,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0009458946529775858,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.06614647805690765,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.09260518848896027,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0009458946529775858,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0009458946529775858,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.06400079652667046,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.089921535551548,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0009152113692834973,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009152113692834973,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.05834746509790421,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08256930112838745,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00083436876302585,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00083436876302585,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.03862107619643211,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.055405861139297484,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005522813764400781,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005522813764400781,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04953863024711609,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06496639400720597,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007084024371579289,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007084024371579289,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.06591839194297791,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.09233485758304597,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0009426330449059606,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0009426330449059606,
|
||
|
|
"step": 105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.24207440577624634,
|
||
|
|
"calibration/batch_distribution_entropy": 0.6875765593814578,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.7860359646652466,
|
||
|
|
"calibration/confidence_entropy": 0.4102154748788104,
|
||
|
|
"calibration/coverage@0%": 0.022595843714871777,
|
||
|
|
"calibration/coverage@1%": 0.022595843714871777,
|
||
|
|
"calibration/coverage@10%": 0.2102467277237638,
|
||
|
|
"calibration/coverage@15%": 0.2534104615446351,
|
||
|
|
"calibration/coverage@20%": 0.3359725625062341,
|
||
|
|
"calibration/coverage@25%": 0.5277390933544955,
|
||
|
|
"calibration/coverage@30%": 0.5790548647469459,
|
||
|
|
"calibration/coverage@5%": 0.11875024921629493,
|
||
|
|
"calibration/ece": 0.15191207953492325,
|
||
|
|
"calibration/mean_confidence": 0.800740167621276,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.018663194444444465,
|
||
|
|
"completions/max_length": 3719.2,
|
||
|
|
"completions/max_terminated_length": 3719.2,
|
||
|
|
"completions/mean_length": 743.8192993164063,
|
||
|
|
"completions/mean_terminated_length": 758.0759155273438,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 211.2,
|
||
|
|
"epoch": 0.2639967000412495,
|
||
|
|
"grad_norm": 0.0006374148651957512,
|
||
|
|
"learning_rate": 2.9518072289156627e-06,
|
||
|
|
"loss": -0.0149,
|
||
|
|
"num_tokens": 239744132.0,
|
||
|
|
"reward": 1.0158015012741088,
|
||
|
|
"reward_std": 0.13897253274917604,
|
||
|
|
"rewards/accuracy_reward": 0.7074652791023255,
|
||
|
|
"rewards/brier_reward": 0.7943165302276611,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9072112798690796,
|
||
|
|
"rewards/format_reward": 0.98125,
|
||
|
|
"rewards/frontier_coverage_0": 0.002520253928378224,
|
||
|
|
"rewards/frontier_coverage_1": 0.002520253928378224,
|
||
|
|
"rewards/frontier_coverage_10": 0.002841651951894164,
|
||
|
|
"rewards/frontier_coverage_15": 0.0039699568413198,
|
||
|
|
"rewards/frontier_coverage_20": 0.013109351228922605,
|
||
|
|
"rewards/frontier_coverage_25": 0.0626706637442112,
|
||
|
|
"rewards/frontier_coverage_5": 0.0026505836751312016,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1546115458011627,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.21346699297428132,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3583333343267441,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07730577290058135,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07730577290058135,
|
||
|
|
"signal/advantage_abs_mean": 0.09841311872005462,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09841311872005462,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1800345003604889,
|
||
|
|
"signal/advantage_std": 0.1800345003604889,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12525416761636735,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.16696035861968994,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012525417283177376,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012525417283177376,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0548114612698555,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08105210959911346,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005481146275997162,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005481146275997162,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.02778862789273262,
|
||
|
|
"signal/format_reward/group_std_mean": 0.04899119287729263,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8055555820465088,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01389431394636631,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01389431394636631,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.056989597529172896,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.08076736629009247,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0008149512344971299,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0008149512344971299,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.056989597529172896,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.08076736629009247,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0008149512344971299,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0008149512344971299,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.05564908087253571,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.07900142818689346,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007957818452268839,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007957818452268839,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0526436798274517,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0750760056078434,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007528046262450516,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007528046262450516,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.037152212113142014,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.05306043922901153,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005312766588758677,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005312766588758677,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04958587661385536,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06510179191827774,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000709078018553555,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000709078018553555,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.05671231150627136,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.08039210587739945,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0008109860471449792,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0008109860471449792,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.33356083398146347,
|
||
|
|
"calibration/batch_distribution_entropy": 0.6959018763618702,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.7522820617818654,
|
||
|
|
"calibration/confidence_entropy": 0.4518816489007994,
|
||
|
|
"calibration/coverage@0%": 0.0032267264856438273,
|
||
|
|
"calibration/coverage@1%": 0.0032267264856438273,
|
||
|
|
"calibration/coverage@10%": 0.044937956432167886,
|
||
|
|
"calibration/coverage@15%": 0.08441266677084883,
|
||
|
|
"calibration/coverage@20%": 0.15283620421861482,
|
||
|
|
"calibration/coverage@25%": 0.3622388517951728,
|
||
|
|
"calibration/coverage@30%": 0.5879621018367821,
|
||
|
|
"calibration/coverage@5%": 0.0032267264856438273,
|
||
|
|
"calibration/ece": 0.20717975780837206,
|
||
|
|
"calibration/mean_confidence": 0.7802978326710153,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.02144097222222221,
|
||
|
|
"completions/max_length": 3724.6,
|
||
|
|
"completions/max_terminated_length": 3724.6,
|
||
|
|
"completions/mean_length": 718.7766479492187,
|
||
|
|
"completions/mean_terminated_length": 734.714306640625,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 199.2,
|
||
|
|
"epoch": 0.27599655004312446,
|
||
|
|
"grad_norm": 0.0006048035575076938,
|
||
|
|
"learning_rate": 2.8012048192771087e-06,
|
||
|
|
"loss": -0.0178,
|
||
|
|
"num_tokens": 251103639.0,
|
||
|
|
"reward": 0.9875237822532654,
|
||
|
|
"reward_std": 0.1449933499097824,
|
||
|
|
"rewards/accuracy_reward": 0.6601562619209289,
|
||
|
|
"rewards/brier_reward": 0.7643703937530517,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9071934223175049,
|
||
|
|
"rewards/format_reward": 0.9785590291023254,
|
||
|
|
"rewards/frontier_coverage_0": 0.004726332519203425,
|
||
|
|
"rewards/frontier_coverage_1": 0.004726332519203425,
|
||
|
|
"rewards/frontier_coverage_10": 0.005253351200371981,
|
||
|
|
"rewards/frontier_coverage_15": 0.005590797681361437,
|
||
|
|
"rewards/frontier_coverage_20": 0.008738029189407826,
|
||
|
|
"rewards/frontier_coverage_25": 0.03678738847374916,
|
||
|
|
"rewards/frontier_coverage_5": 0.004789900593459606,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1631022125482559,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.2127610206604004,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08155110627412795,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08155110627412795,
|
||
|
|
"signal/advantage_abs_mean": 0.10652477592229843,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.10652477592229843,
|
||
|
|
"signal/advantage_pre_scale_std": 0.18951753973960878,
|
||
|
|
"signal/advantage_std": 0.18951753973960878,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13075682073831557,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.17059859931468963,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013075682520866393,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013075682520866393,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05514480024576187,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08304563462734223,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005514480173587799,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005514480173587799,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.03312174491584301,
|
||
|
|
"signal/format_reward/group_std_mean": 0.05764241740107536,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.7777777910232544,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016560872457921504,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.016560872457921504,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.051223869621753695,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.07264740690588951,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0007325013517402113,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0007325013517402113,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.051223869621753695,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.07264740690588951,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0007325013517402113,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0007325013517402113,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.04941959977149964,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.07031489610671997,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007067002821713686,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007067002821713686,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.04780538156628609,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.06820949018001557,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0006836169632151723,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0006836169632151723,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.03383687846362591,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0490049920976162,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0004838673456106335,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0004838673456106335,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04597809240221977,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06020479202270508,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006574866769369691,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006574866769369691,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.05090032443404198,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.07223524823784828,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0007278746110387146,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0007278746110387146,
|
||
|
|
"step": 115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.31065150593398905,
|
||
|
|
"calibration/batch_distribution_entropy": 0.7314189320033493,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.7229353609548825,
|
||
|
|
"calibration/confidence_entropy": 0.4617376720170327,
|
||
|
|
"calibration/coverage@0%": 0.018317815283684445,
|
||
|
|
"calibration/coverage@1%": 0.018317815283684445,
|
||
|
|
"calibration/coverage@10%": 0.0586319514093389,
|
||
|
|
"calibration/coverage@15%": 0.1323828570454714,
|
||
|
|
"calibration/coverage@20%": 0.20926282095672583,
|
||
|
|
"calibration/coverage@25%": 0.5352344590321512,
|
||
|
|
"calibration/coverage@30%": 0.5989735572236128,
|
||
|
|
"calibration/coverage@5%": 0.022506296959077115,
|
||
|
|
"calibration/ece": 0.1755218508227229,
|
||
|
|
"calibration/mean_confidence": 0.7753019329613121,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.015451388888888862,
|
||
|
|
"completions/max_length": 3661.4,
|
||
|
|
"completions/max_terminated_length": 3661.4,
|
||
|
|
"completions/mean_length": 682.734814453125,
|
||
|
|
"completions/mean_terminated_length": 693.4420043945313,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 205.6,
|
||
|
|
"epoch": 0.28799640004499943,
|
||
|
|
"grad_norm": 0.0003586419625207782,
|
||
|
|
"learning_rate": 2.6506024096385547e-06,
|
||
|
|
"loss": -0.0125,
|
||
|
|
"num_tokens": 262050600.0,
|
||
|
|
"reward": 1.0042026162147522,
|
||
|
|
"reward_std": 0.13399964123964309,
|
||
|
|
"rewards/accuracy_reward": 0.6789930582046508,
|
||
|
|
"rewards/brier_reward": 0.7894548892974853,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9230864763259887,
|
||
|
|
"rewards/format_reward": 0.9845486044883728,
|
||
|
|
"rewards/frontier_coverage_0": 0.007614323310554028,
|
||
|
|
"rewards/frontier_coverage_1": 0.007614323310554028,
|
||
|
|
"rewards/frontier_coverage_10": 0.007986792828887701,
|
||
|
|
"rewards/frontier_coverage_15": 0.009333854354918004,
|
||
|
|
"rewards/frontier_coverage_20": 0.01230423217639327,
|
||
|
|
"rewards/frontier_coverage_25": 0.02976319268345833,
|
||
|
|
"rewards/frontier_coverage_5": 0.007735726609826088,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15657551884651183,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.20762513875961303,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40833333134651184,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07828775942325591,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07828775942325591,
|
||
|
|
"signal/advantage_abs_mean": 0.09784245938062668,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09784245938062668,
|
||
|
|
"signal/advantage_pre_scale_std": 0.17648713588714598,
|
||
|
|
"signal/advantage_std": 0.17648713588714598,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12497896701097488,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.16387878954410554,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012497896514832973,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012497896514832973,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04354229345917702,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06585515961050988,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004354229662567377,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004354229662567377,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.02369791679084301,
|
||
|
|
"signal/format_reward/group_std_mean": 0.04182791784405708,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8361111164093018,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011848958395421504,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.011848958395421504,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.06305849850177765,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.08656549751758576,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0009017364820465446,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0009017364820465446,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.06305849850177765,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.08656549751758576,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0009017364820465446,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0009017364820465446,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.06107858419418335,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.08405377566814423,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0008734237751923501,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008734237751923501,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.05693260729312897,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.07869968116283417,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008141362806782127,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008141362806782127,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.041933455318212506,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.05869346261024475,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005996484076604247,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005996484076604247,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04046922326087952,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.053788629919290544,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005787098547443747,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005787098547443747,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.06229802295565605,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.08560123592615128,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0008908617543056607,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0008908617543056607,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2082183569339026,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8211991327882566,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.7568997286321094,
|
||
|
|
"calibration/confidence_entropy": 0.5527962206752399,
|
||
|
|
"calibration/coverage@0%": 0.022027035701369612,
|
||
|
|
"calibration/coverage@1%": 0.022027035701369612,
|
||
|
|
"calibration/coverage@10%": 0.29937981558557936,
|
||
|
|
"calibration/coverage@15%": 0.3620036508586016,
|
||
|
|
"calibration/coverage@20%": 0.4019900773574,
|
||
|
|
"calibration/coverage@25%": 0.6806566429667174,
|
||
|
|
"calibration/coverage@30%": 0.7411075469279128,
|
||
|
|
"calibration/coverage@5%": 0.08646273123942735,
|
||
|
|
"calibration/ece": 0.12462517933287802,
|
||
|
|
"calibration/mean_confidence": 0.6664884744206967,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.014496527777777768,
|
||
|
|
"completions/max_length": 3502.2,
|
||
|
|
"completions/max_terminated_length": 3502.2,
|
||
|
|
"completions/mean_length": 670.2757080078125,
|
||
|
|
"completions/mean_terminated_length": 680.2117797851563,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 183.4,
|
||
|
|
"epoch": 0.2999962500468744,
|
||
|
|
"grad_norm": 0.0004161894030403346,
|
||
|
|
"learning_rate": 2.5e-06,
|
||
|
|
"loss": -0.0132,
|
||
|
|
"num_tokens": 272889840.0,
|
||
|
|
"reward": 1.005896532535553,
|
||
|
|
"reward_std": 0.12880902737379074,
|
||
|
|
"rewards/accuracy_reward": 0.6809027791023254,
|
||
|
|
"rewards/brier_reward": 0.7915880799293518,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9347058057785034,
|
||
|
|
"rewards/format_reward": 0.9855034708976745,
|
||
|
|
"rewards/frontier_coverage_0": -0.006131393508985639,
|
||
|
|
"rewards/frontier_coverage_1": -0.006131393508985639,
|
||
|
|
"rewards/frontier_coverage_10": -0.004654986085370183,
|
||
|
|
"rewards/frontier_coverage_15": -0.0017717648821417241,
|
||
|
|
"rewards/frontier_coverage_20": 0.002877543866634369,
|
||
|
|
"rewards/frontier_coverage_25": 0.02585282623767853,
|
||
|
|
"rewards/frontier_coverage_5": -0.005564809101633728,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15927734076976777,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.20954229235649108,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07963867038488388,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07963867038488388,
|
||
|
|
"signal/advantage_abs_mean": 0.0956741526722908,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.0956741526722908,
|
||
|
|
"signal/advantage_pre_scale_std": 0.16752077937126159,
|
||
|
|
"signal/advantage_std": 0.16752077937126159,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.11655332297086715,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1505295991897583,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011655332706868648,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011655332706868648,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04199672415852547,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06219554841518402,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004199672443792224,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004199672443792224,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.023106553591787815,
|
||
|
|
"signal/format_reward/group_std_mean": 0.03924813717603683,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8499999880790711,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011553276795893908,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.011553276795893908,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.10545411854982376,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.13934148699045182,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001507993880659342,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001507993880659342,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10545411854982376,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.13934148699045182,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001507993880659342,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001507993880659342,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10189146101474762,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1348419487476349,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014570478349924087,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014570478349924087,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09459190368652344,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.12573918104171752,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013526642229408025,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013526642229408025,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07673133313655853,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1030562549829483,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010972580406814814,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010972580406814814,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04959097653627396,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06758146658539772,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007091509876772761,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007091509876772761,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10424444675445557,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1378079980611801,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014906955417245626,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014906955417245626,
|
||
|
|
"step": 125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.26075673968720536,
|
||
|
|
"calibration/batch_distribution_entropy": 0.874285034631829,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8284399180154496,
|
||
|
|
"calibration/confidence_entropy": 0.5473131223715086,
|
||
|
|
"calibration/coverage@0%": 0.024374274739239032,
|
||
|
|
"calibration/coverage@1%": 0.024374274739239032,
|
||
|
|
"calibration/coverage@10%": 0.13814292939717276,
|
||
|
|
"calibration/coverage@15%": 0.2927289784951862,
|
||
|
|
"calibration/coverage@20%": 0.3928601633049443,
|
||
|
|
"calibration/coverage@25%": 0.46272417076934397,
|
||
|
|
"calibration/coverage@30%": 0.6714604813734013,
|
||
|
|
"calibration/coverage@5%": 0.04966937264230805,
|
||
|
|
"calibration/ece": 0.12237342404558275,
|
||
|
|
"calibration/mean_confidence": 0.626171849921876,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.021440972222222254,
|
||
|
|
"completions/max_length": 3600.2,
|
||
|
|
"completions/max_terminated_length": 3600.2,
|
||
|
|
"completions/mean_length": 685.9378540039063,
|
||
|
|
"completions/mean_terminated_length": 701.0846801757813,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 168.2,
|
||
|
|
"epoch": 0.3119961000487494,
|
||
|
|
"grad_norm": 0.0004247704928275198,
|
||
|
|
"learning_rate": 2.349397590361446e-06,
|
||
|
|
"loss": -0.0169,
|
||
|
|
"num_tokens": 283916644.0,
|
||
|
|
"reward": 0.9914659857749939,
|
||
|
|
"reward_std": 0.14033911675214766,
|
||
|
|
"rewards/accuracy_reward": 0.6625868201255798,
|
||
|
|
"rewards/brier_reward": 0.7736161351203918,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9349322438240051,
|
||
|
|
"rewards/format_reward": 0.9785590291023254,
|
||
|
|
"rewards/frontier_coverage_0": -0.010037094075232744,
|
||
|
|
"rewards/frontier_coverage_1": -0.010037094075232744,
|
||
|
|
"rewards/frontier_coverage_10": -0.008441044599749148,
|
||
|
|
"rewards/frontier_coverage_15": -0.00335610918700695,
|
||
|
|
"rewards/frontier_coverage_20": 0.008132204459980131,
|
||
|
|
"rewards/frontier_coverage_25": 0.03618508372455835,
|
||
|
|
"rewards/frontier_coverage_5": -0.009772640746086836,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17634005844593048,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.23339370787143707,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3388888895511627,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08817002922296524,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08817002922296524,
|
||
|
|
"signal/advantage_abs_mean": 0.10400059223175048,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.10400059223175048,
|
||
|
|
"signal/advantage_pre_scale_std": 0.17410335540771485,
|
||
|
|
"signal/advantage_std": 0.17410335540771485,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12626205682754515,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.16308861076831818,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012626205757260322,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012626205757260322,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04283556342124939,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06413544788956642,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0042835562489926815,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042835562489926815,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.027652995474636555,
|
||
|
|
"signal/format_reward/group_std_mean": 0.045563656091690066,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8277777910232544,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013826497737318278,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013826497737318278,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13005276918411254,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17312212884426117,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001859754603356123,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001859754603356123,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13005276918411254,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17312212884426117,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001859754603356123,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001859754603356123,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12581277936697005,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16772884130477905,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017991228029131888,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017991228029131888,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11518070250749587,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1540255665779114,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016470840433612465,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016470840433612465,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08615544736385346,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11641546934843064,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012320228852331638,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012320228852331638,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05977813303470612,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07970146983861923,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008548272890038788,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008548272890038788,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12958541363477707,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1725340485572815,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001853071292862296,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001853071292862296,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2546360563314921,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8330176576430549,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8734325642612234,
|
||
|
|
"calibration/confidence_entropy": 0.4818107536798132,
|
||
|
|
"calibration/coverage@0%": 0.014179309586631486,
|
||
|
|
"calibration/coverage@1%": 0.014179309586631486,
|
||
|
|
"calibration/coverage@10%": 0.23271767810026386,
|
||
|
|
"calibration/coverage@15%": 0.26649076517150394,
|
||
|
|
"calibration/coverage@20%": 0.433201793135017,
|
||
|
|
"calibration/coverage@25%": 0.5304986738889623,
|
||
|
|
"calibration/coverage@30%": 0.5728792386450393,
|
||
|
|
"calibration/coverage@5%": 0.1934078166226913,
|
||
|
|
"calibration/ece": 0.1538258270193908,
|
||
|
|
"calibration/mean_confidence": 0.7085402165749761,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.011892361111111093,
|
||
|
|
"completions/max_length": 3711.6,
|
||
|
|
"completions/max_terminated_length": 3711.6,
|
||
|
|
"completions/mean_length": 664.8523559570312,
|
||
|
|
"completions/mean_terminated_length": 672.9011596679687,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 161.8,
|
||
|
|
"epoch": 0.32399595005062437,
|
||
|
|
"grad_norm": 0.000490625505335629,
|
||
|
|
"learning_rate": 2.1987951807228917e-06,
|
||
|
|
"loss": -0.0101,
|
||
|
|
"num_tokens": 294668767.0,
|
||
|
|
"reward": 1.0130939960479737,
|
||
|
|
"reward_std": 0.1343725234270096,
|
||
|
|
"rewards/accuracy_reward": 0.6880208373069763,
|
||
|
|
"rewards/brier_reward": 0.7956640720367432,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9440826296806335,
|
||
|
|
"rewards/format_reward": 0.9880208373069763,
|
||
|
|
"rewards/frontier_coverage_0": -0.0007627993822097778,
|
||
|
|
"rewards/frontier_coverage_1": -0.0007627993822097778,
|
||
|
|
"rewards/frontier_coverage_10": 0.0009573293849825859,
|
||
|
|
"rewards/frontier_coverage_15": 0.004394118906930089,
|
||
|
|
"rewards/frontier_coverage_20": 0.015732752112671732,
|
||
|
|
"rewards/frontier_coverage_25": 0.05771690420806408,
|
||
|
|
"rewards/frontier_coverage_5": -0.000457253772765398,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16697049140930176,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.223639115691185,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.35555556416511536,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08348524570465088,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08348524570465088,
|
||
|
|
"signal/advantage_abs_mean": 0.09535450786352158,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09535450786352158,
|
||
|
|
"signal/advantage_pre_scale_std": 0.16756429374217988,
|
||
|
|
"signal/advantage_std": 0.16756429374217988,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12386199980974197,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.16375071704387664,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012386200204491615,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012386200204491615,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.035422375053167345,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.059968823194503786,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00354223744943738,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00354223744943738,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.02140842005610466,
|
||
|
|
"signal/format_reward/group_std_mean": 0.04312895014882088,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8166666626930237,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01070421002805233,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01070421002805233,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.11899998188018798,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.15951877534389497,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001701699779368937,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001701699779368937,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11899998188018798,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15951877534389497,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001701699779368937,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001701699779368937,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11335770487785339,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1522216647863388,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001621015160344541,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001621015160344541,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10240471959114075,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13803330510854722,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014643874485045672,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014643874485045672,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0672150082886219,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0915078029036522,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009611746412701905,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009611746412701905,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06437275260686874,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08481907844543457,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009205303387716413,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009205303387716413,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11798569560050964,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15821486115455627,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00168719538487494,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00168719538487494,
|
||
|
|
"step": 135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.1854829328737862,
|
||
|
|
"calibration/batch_distribution_entropy": 0.7996682424965277,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8905462538846519,
|
||
|
|
"calibration/confidence_entropy": 0.41854264390855284,
|
||
|
|
"calibration/coverage@0%": 0.04533180574298995,
|
||
|
|
"calibration/coverage@1%": 0.04533180574298995,
|
||
|
|
"calibration/coverage@10%": 0.24171738663204975,
|
||
|
|
"calibration/coverage@15%": 0.3580652283340696,
|
||
|
|
"calibration/coverage@20%": 0.5067775705274844,
|
||
|
|
"calibration/coverage@25%": 0.7254039905217915,
|
||
|
|
"calibration/coverage@30%": 0.956267313498989,
|
||
|
|
"calibration/coverage@5%": 0.12600271378231903,
|
||
|
|
"calibration/ece": 0.12841547965995068,
|
||
|
|
"calibration/mean_confidence": 0.7517699707017254,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.013541666666666674,
|
||
|
|
"completions/max_length": 3653.8,
|
||
|
|
"completions/max_terminated_length": 3653.8,
|
||
|
|
"completions/mean_length": 649.6119018554688,
|
||
|
|
"completions/mean_terminated_length": 658.50859375,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 165.4,
|
||
|
|
"epoch": 0.33599580005249935,
|
||
|
|
"grad_norm": 0.0004042711516376585,
|
||
|
|
"learning_rate": 2.0481927710843377e-06,
|
||
|
|
"loss": -0.0109,
|
||
|
|
"num_tokens": 305256520.0,
|
||
|
|
"reward": 1.0060909271240235,
|
||
|
|
"reward_std": 0.12792308628559113,
|
||
|
|
"rewards/accuracy_reward": 0.6741319417953491,
|
||
|
|
"rewards/brier_reward": 0.7920880436897277,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9383025646209717,
|
||
|
|
"rewards/format_reward": 0.9864583373069763,
|
||
|
|
"rewards/frontier_coverage_0": 0.014161212788894772,
|
||
|
|
"rewards/frontier_coverage_1": 0.014161212788894772,
|
||
|
|
"rewards/frontier_coverage_10": 0.014498895592987537,
|
||
|
|
"rewards/frontier_coverage_15": 0.014886665157973766,
|
||
|
|
"rewards/frontier_coverage_20": 0.027863727882504463,
|
||
|
|
"rewards/frontier_coverage_25": 0.09292519390583039,
|
||
|
|
"rewards/frontier_coverage_5": 0.014280988043174148,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15073784589767455,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.2007976531982422,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4194444477558136,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07536892294883728,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07536892294883728,
|
||
|
|
"signal/advantage_abs_mean": 0.09263349771499634,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09263349771499634,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1665874868631363,
|
||
|
|
"signal/advantage_std": 0.1665874868631363,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12627332657575607,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.16518832445144654,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012627332285046578,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012627332285046578,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037095585465431215,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06054994612932205,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003709558630362153,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003709558630362153,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.021560330502688886,
|
||
|
|
"signal/format_reward/group_std_mean": 0.041833048313856126,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8194444417953491,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010780165251344443,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010780165251344443,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.09921992719173431,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.13895856738090515,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014188449829816818,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014188449829816818,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09921992719173431,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.13895856738090515,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014188449829816818,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014188449829816818,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09619618356227874,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.13501449525356293,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013756054220721125,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013756054220721125,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08267004191875457,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1172051951289177,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011821816442534328,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011821816442534328,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.049167075753211976,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06892770677804946,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000703089137095958,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000703089137095958,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08086840957403182,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10296626091003418,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011564183048903942,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011564183048903942,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09877839088439941,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.13836795836687088,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001412531011737883,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001412531011737883,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.1974786325318561,
|
||
|
|
"calibration/batch_distribution_entropy": 0.807348167009032,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8525369126779999,
|
||
|
|
"calibration/confidence_entropy": 0.41936921197900173,
|
||
|
|
"calibration/coverage@0%": 0.020484134148034995,
|
||
|
|
"calibration/coverage@1%": 0.020484134148034995,
|
||
|
|
"calibration/coverage@10%": 0.12842496038935974,
|
||
|
|
"calibration/coverage@15%": 0.3236565417377872,
|
||
|
|
"calibration/coverage@20%": 0.635893543720149,
|
||
|
|
"calibration/coverage@25%": 0.7277863040288939,
|
||
|
|
"calibration/coverage@30%": 0.8788835787859582,
|
||
|
|
"calibration/coverage@5%": 0.055793119011248435,
|
||
|
|
"calibration/ece": 0.12143313418001216,
|
||
|
|
"calibration/mean_confidence": 0.741192283313203,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.01067708333333337,
|
||
|
|
"completions/max_length": 3261.6,
|
||
|
|
"completions/max_terminated_length": 3261.6,
|
||
|
|
"completions/mean_length": 632.3050537109375,
|
||
|
|
"completions/mean_terminated_length": 639.30107421875,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 182.4,
|
||
|
|
"epoch": 0.34799565005437433,
|
||
|
|
"grad_norm": 0.00040693863411433995,
|
||
|
|
"learning_rate": 1.8975903614457832e-06,
|
||
|
|
"loss": -0.0105,
|
||
|
|
"num_tokens": 315605282.0,
|
||
|
|
"reward": 1.0286210775375366,
|
||
|
|
"reward_std": 0.11897408664226532,
|
||
|
|
"rewards/accuracy_reward": 0.7124131917953491,
|
||
|
|
"rewards/brier_reward": 0.8128675818443298,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9287760734558106,
|
||
|
|
"rewards/format_reward": 0.9893229246139527,
|
||
|
|
"rewards/frontier_coverage_0": 0.01579418806359172,
|
||
|
|
"rewards/frontier_coverage_1": 0.01579418806359172,
|
||
|
|
"rewards/frontier_coverage_10": 0.016339881264138968,
|
||
|
|
"rewards/frontier_coverage_15": 0.01718453587964177,
|
||
|
|
"rewards/frontier_coverage_20": 0.03750094771385193,
|
||
|
|
"rewards/frontier_coverage_25": 0.1325998529791832,
|
||
|
|
"rewards/frontier_coverage_5": 0.015739528834819792,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.137353515625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.18777381181716918,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4361111164093018,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0686767578125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0686767578125,
|
||
|
|
"signal/advantage_abs_mean": 0.08469511717557907,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.08469511717557907,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1597517877817154,
|
||
|
|
"signal/advantage_std": 0.1597517877817154,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12741477489471437,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.16703042685985564,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012741477787494659,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012741477787494659,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04068734273314476,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05980287864804268,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004068734264001251,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004068734264001251,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.018288845382630824,
|
||
|
|
"signal/format_reward/group_std_mean": 0.03258528374135494,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8750000119209289,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009144422691315412,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009144422691315412,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.08689655661582947,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.11893046051263809,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00124262070748955,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00124262070748955,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.08689655661582947,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.11893046051263809,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00124262070748955,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00124262070748955,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.08449746146798134,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.11587611138820648,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012083137058652937,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012083137058652937,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07269451022148132,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1005746454000473,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010395315941423178,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010395315941423178,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.04770020917057991,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0634695328772068,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0006821130053140223,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006821130053140223,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08773275762796402,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11324829757213592,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012545783771201967,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012545783771201967,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.08658337146043778,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.11853417456150055,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012381422566249967,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012381422566249967,
|
||
|
|
"step": 145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2009286800964662,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8164642374302046,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8201398178798591,
|
||
|
|
"calibration/confidence_entropy": 0.4014055039722427,
|
||
|
|
"calibration/coverage@0%": 0.010666666666666668,
|
||
|
|
"calibration/coverage@1%": 0.010666666666666668,
|
||
|
|
"calibration/coverage@10%": 0.36389817290552584,
|
||
|
|
"calibration/coverage@15%": 0.4667266934046346,
|
||
|
|
"calibration/coverage@20%": 0.5943920900178253,
|
||
|
|
"calibration/coverage@25%": 0.6405080213903743,
|
||
|
|
"calibration/coverage@30%": 0.6833778966131907,
|
||
|
|
"calibration/coverage@5%": 0.09828333333333332,
|
||
|
|
"calibration/ece": 0.15416181123167888,
|
||
|
|
"calibration/mean_confidence": 0.7122128912211364,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.009722222222222233,
|
||
|
|
"completions/max_length": 3621.2,
|
||
|
|
"completions/max_terminated_length": 3621.2,
|
||
|
|
"completions/mean_length": 713.7203979492188,
|
||
|
|
"completions/mean_terminated_length": 720.7264038085938,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 178.2,
|
||
|
|
"epoch": 0.3599955000562493,
|
||
|
|
"grad_norm": 0.0004712261143140495,
|
||
|
|
"learning_rate": 1.7469879518072292e-06,
|
||
|
|
"loss": -0.0083,
|
||
|
|
"num_tokens": 326937677.0,
|
||
|
|
"reward": 1.0151524186134337,
|
||
|
|
"reward_std": 0.1302838146686554,
|
||
|
|
"rewards/accuracy_reward": 0.6858506917953491,
|
||
|
|
"rewards/brier_reward": 0.8010693430900574,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9333727955818176,
|
||
|
|
"rewards/format_reward": 0.9902777671813965,
|
||
|
|
"rewards/frontier_coverage_0": 0.021950625255703925,
|
||
|
|
"rewards/frontier_coverage_1": 0.021950625255703925,
|
||
|
|
"rewards/frontier_coverage_10": 0.022064855322241783,
|
||
|
|
"rewards/frontier_coverage_15": 0.022844681143760683,
|
||
|
|
"rewards/frontier_coverage_20": 0.03186333496123552,
|
||
|
|
"rewards/frontier_coverage_25": 0.11219749450683594,
|
||
|
|
"rewards/frontier_coverage_5": 0.021950625255703925,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16527235209941865,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.21440712809562684,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08263617604970933,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08263617604970933,
|
||
|
|
"signal/advantage_abs_mean": 0.09583060741424561,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09583060741424561,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1677115947008133,
|
||
|
|
"signal/advantage_std": 0.1677115947008133,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1361823335289955,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.17782002985477446,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013618233613669872,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013618233613669872,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.039950243383646014,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06207837164402008,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003995024506002665,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003995024506002665,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.01697048614732921,
|
||
|
|
"signal/format_reward/group_std_mean": 0.03419107310473919,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.850000011920929,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008485243073664606,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008485243073664606,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1078558087348938,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.15047508776187896,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015423380769789218,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015423380769789218,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1078558087348938,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15047508776187896,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015423380769789218,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015423380769789218,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10575756281614304,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1477883592247963,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015123330289497972,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015123330289497972,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09201570004224777,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.12966947257518768,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013158244779333471,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013158244779333471,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05600855126976967,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07790684998035431,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008009222452528775,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008009222452528775,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08503240048885345,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1091775730252266,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012159633450210094,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012159633450210094,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1078558087348938,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15047508776187896,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.002777777798473835,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015423380769789218,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015423380769789218,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3599955000562493,
|
||
|
|
"eval_calibration/aurc": 0.1497119816911291,
|
||
|
|
"eval_calibration/batch_distribution_entropy": 0.7968346178805156,
|
||
|
|
"eval_calibration/buffer_distribution_entropy": 0.8407002187440905,
|
||
|
|
"eval_calibration/confidence_entropy": 0.4125144127540233,
|
||
|
|
"eval_calibration/coverage@0%": 0.20950940860215053,
|
||
|
|
"eval_calibration/coverage@1%": 0.20950940860215053,
|
||
|
|
"eval_calibration/coverage@10%": 0.43934811827956993,
|
||
|
|
"eval_calibration/coverage@15%": 0.5816532258064516,
|
||
|
|
"eval_calibration/coverage@20%": 0.6876680107526881,
|
||
|
|
"eval_calibration/coverage@25%": 0.8776881720430106,
|
||
|
|
"eval_calibration/coverage@30%": 0.9786626344086021,
|
||
|
|
"eval_calibration/coverage@5%": 0.2824260752688172,
|
||
|
|
"eval_calibration/ece": 0.16998011964569013,
|
||
|
|
"eval_calibration/mean_confidence": 0.7282188050854926,
|
||
|
|
"eval_completions/clipped_ratio": 0.008680555555555561,
|
||
|
|
"eval_completions/max_length": 2416.5,
|
||
|
|
"eval_completions/max_terminated_length": 2416.5,
|
||
|
|
"eval_completions/mean_length": 695.3529561360677,
|
||
|
|
"eval_completions/mean_terminated_length": 701.4133707682291,
|
||
|
|
"eval_completions/min_length": 54.666666666666664,
|
||
|
|
"eval_completions/min_terminated_length": 227.16666666666666,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 326937677.0,
|
||
|
|
"eval_reward": 1.0095117688179016,
|
||
|
|
"eval_reward_std": 0.24901040395100912,
|
||
|
|
"eval_rewards/accuracy_reward": 0.6796875099341074,
|
||
|
|
"eval_rewards/brier_reward": 0.8043731153011322,
|
||
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8918871482213339,
|
||
|
|
"eval_rewards/format_reward": 0.9913194477558136,
|
||
|
|
"eval_rewards/frontier_coverage_0": 0.034088116294393934,
|
||
|
|
"eval_rewards/frontier_coverage_1": 0.034088116294393934,
|
||
|
|
"eval_rewards/frontier_coverage_10": 0.03365040601541599,
|
||
|
|
"eval_rewards/frontier_coverage_15": 0.033414963788042464,
|
||
|
|
"eval_rewards/frontier_coverage_20": 0.037281897539893784,
|
||
|
|
"eval_rewards/frontier_coverage_25": 0.0998396414021651,
|
||
|
|
"eval_rewards/frontier_coverage_5": 0.034088116294393934,
|
||
|
|
"eval_runtime": 190.7746,
|
||
|
|
"eval_samples_per_second": 5.242,
|
||
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4189995676279068,
|
||
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4633843054374059,
|
||
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2094997838139534,
|
||
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
||
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2094997838139534,
|
||
|
|
"eval_signal/advantage_abs_mean": 0.2170354425907135,
|
||
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2170354425907135,
|
||
|
|
"eval_signal/advantage_pre_scale_std": 0.24843567858139673,
|
||
|
|
"eval_signal/advantage_std": 0.24843567858139673,
|
||
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2193461755911509,
|
||
|
|
"eval_signal/brier_reward/group_std_mean": 0.27922573685646057,
|
||
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02193461824208498,
|
||
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02193461824208498,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04991401235262553,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07574755760530631,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0049914012585456176,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0049914012585456176,
|
||
|
|
"eval_signal/format_reward/centered_abs_mean": 0.016493055348594982,
|
||
|
|
"eval_signal/format_reward/group_std_mean": 0.04259948432445526,
|
||
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.7777778009573618,
|
||
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.008246527674297491,
|
||
|
|
"eval_signal/format_reward/weight": 0.5,
|
||
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.008246527674297491,
|
||
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.1894952729344368,
|
||
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.30272159973780316,
|
||
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027097822166979313,
|
||
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027097822166979313,
|
||
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.1894952729344368,
|
||
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.30272159973780316,
|
||
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027097822166979313,
|
||
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027097822166979313,
|
||
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.18484538545211157,
|
||
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.29638702670733136,
|
||
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026432890444993973,
|
||
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026432890444993973,
|
||
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.15993489821751913,
|
||
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.26100187251965207,
|
||
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022870690639441213,
|
||
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022870690639441213,
|
||
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.08387432868282,
|
||
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.1353093981742859,
|
||
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011994028852010767,
|
||
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011994028852010767,
|
||
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.13570485015710196,
|
||
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.16785304248332977,
|
||
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019405794446356595,
|
||
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019405794446356595,
|
||
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.1894952729344368,
|
||
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.30272159973780316,
|
||
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027097822166979313,
|
||
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027097822166979313,
|
||
|
|
"eval_steps_per_second": 0.031,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.17029982830875182,
|
||
|
|
"calibration/batch_distribution_entropy": 0.828323867142758,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8372604232456323,
|
||
|
|
"calibration/confidence_entropy": 0.40143344705114015,
|
||
|
|
"calibration/coverage@0%": 0.002617801047120419,
|
||
|
|
"calibration/coverage@1%": 0.002617801047120419,
|
||
|
|
"calibration/coverage@10%": 0.5048941513727101,
|
||
|
|
"calibration/coverage@15%": 0.5596638265431797,
|
||
|
|
"calibration/coverage@20%": 0.6364728575365114,
|
||
|
|
"calibration/coverage@25%": 0.7473739322127307,
|
||
|
|
"calibration/coverage@30%": 0.8836594103058694,
|
||
|
|
"calibration/coverage@5%": 0.002617801047120419,
|
||
|
|
"calibration/ece": 0.13330443000274134,
|
||
|
|
"calibration/mean_confidence": 0.7272123755133635,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.008940972222222232,
|
||
|
|
"completions/max_length": 3450.4,
|
||
|
|
"completions/max_terminated_length": 3450.4,
|
||
|
|
"completions/mean_length": 683.6056518554688,
|
||
|
|
"completions/mean_terminated_length": 689.88115234375,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 187.0,
|
||
|
|
"epoch": 0.3719953500581243,
|
||
|
|
"grad_norm": 0.00048128137132152915,
|
||
|
|
"learning_rate": 1.5963855421686747e-06,
|
||
|
|
"loss": -0.0059,
|
||
|
|
"num_tokens": 337920526.0,
|
||
|
|
"reward": 1.0457221508026122,
|
||
|
|
"reward_std": 0.1301838055253029,
|
||
|
|
"rewards/accuracy_reward": 0.7413194417953491,
|
||
|
|
"rewards/brier_reward": 0.8202741265296936,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.942044448852539,
|
||
|
|
"rewards/format_reward": 0.9910590171813964,
|
||
|
|
"rewards/frontier_coverage_0": 0.0016106660943478346,
|
||
|
|
"rewards/frontier_coverage_1": 0.0016106660943478346,
|
||
|
|
"rewards/frontier_coverage_10": 0.002487003430724144,
|
||
|
|
"rewards/frontier_coverage_15": 0.008382186014205217,
|
||
|
|
"rewards/frontier_coverage_20": 0.040102506056427956,
|
||
|
|
"rewards/frontier_coverage_25": 0.17503876686096193,
|
||
|
|
"rewards/frontier_coverage_5": 0.0016106660943478346,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16593966782093048,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.21918415725231172,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3805555641651154,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08296983391046524,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08296983391046524,
|
||
|
|
"signal/advantage_abs_mean": 0.09460719525814057,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09460719525814057,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1647391140460968,
|
||
|
|
"signal/advantage_std": 0.1647391140460968,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13260589838027953,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.17412539422512055,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013260589353740216,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013260589353740216,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.033828570321202275,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05308753773570061,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003382857143878937,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003382857143878937,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.015771484561264516,
|
||
|
|
"signal/format_reward/group_std_mean": 0.03148765973746777,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8638889074325562,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007885742280632258,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007885742280632258,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.12180711925029755,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.16962958872318268,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017418418079614638,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017418418079614638,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12180711925029755,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16962958872318268,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017418418079614638,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017418418079614638,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11921639740467072,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16621364057064056,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017047945875674486,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017047945875674486,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0924990564584732,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13051227778196334,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001322736474685371,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001322736474685371,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05612751841545105,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07590975016355514,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008026235154829919,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008026235154829919,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11158772855997086,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14441257119178771,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015957045601680876,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015957045601680876,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12180711925029755,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16962958872318268,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017418418079614638,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017418418079614638,
|
||
|
|
"step": 155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.13742324374109022,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8046436261756036,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8454471239403271,
|
||
|
|
"calibration/confidence_entropy": 0.4209601948297349,
|
||
|
|
"calibration/coverage@0%": 0.030325589005235597,
|
||
|
|
"calibration/coverage@1%": 0.030325589005235597,
|
||
|
|
"calibration/coverage@10%": 0.6808016148896961,
|
||
|
|
"calibration/coverage@15%": 0.7603269147084422,
|
||
|
|
"calibration/coverage@20%": 0.781201044386423,
|
||
|
|
"calibration/coverage@25%": 0.8,
|
||
|
|
"calibration/coverage@30%": 0.8410526315789474,
|
||
|
|
"calibration/coverage@5%": 0.3801644545744061,
|
||
|
|
"calibration/ece": 0.12313236262405054,
|
||
|
|
"calibration/mean_confidence": 0.7447349905237983,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.010069444444444442,
|
||
|
|
"completions/max_length": 3256.0,
|
||
|
|
"completions/max_terminated_length": 3256.0,
|
||
|
|
"completions/mean_length": 697.6481811523438,
|
||
|
|
"completions/mean_terminated_length": 704.7627807617188,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 187.6,
|
||
|
|
"epoch": 0.38399520005999926,
|
||
|
|
"grad_norm": 0.00042031033081002533,
|
||
|
|
"learning_rate": 1.4457831325301204e-06,
|
||
|
|
"loss": -0.0081,
|
||
|
|
"num_tokens": 349044729.0,
|
||
|
|
"reward": 1.011973214149475,
|
||
|
|
"reward_std": 0.1254075601696968,
|
||
|
|
"rewards/accuracy_reward": 0.6768229246139527,
|
||
|
|
"rewards/brier_reward": 0.7973306894302368,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9424768567085267,
|
||
|
|
"rewards/format_reward": 0.98984375,
|
||
|
|
"rewards/frontier_coverage_0": 0.020558654330670834,
|
||
|
|
"rewards/frontier_coverage_1": 0.020558654330670834,
|
||
|
|
"rewards/frontier_coverage_10": 0.020932418294250965,
|
||
|
|
"rewards/frontier_coverage_15": 0.022469326481223108,
|
||
|
|
"rewards/frontier_coverage_20": 0.0480774313211441,
|
||
|
|
"rewards/frontier_coverage_25": 0.17265710532665252,
|
||
|
|
"rewards/frontier_coverage_5": 0.020558654330670834,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15957573652267457,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.20743002891540527,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07978786826133728,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07978786826133728,
|
||
|
|
"signal/advantage_abs_mean": 0.09231876432895661,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09231876432895661,
|
||
|
|
"signal/advantage_pre_scale_std": 0.16247815191745757,
|
||
|
|
"signal/advantage_std": 0.16247815191745757,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13798875659704207,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.17662995755672456,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013798876665532589,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013798876665532589,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.033293415978550914,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05347738191485405,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003329341718927026,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003329341718927026,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.01769205741584301,
|
||
|
|
"signal/format_reward/group_std_mean": 0.03484956584870815,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.85,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008846028707921505,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008846028707921505,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13067993819713591,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17672376036643983,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018687231000512837,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018687231000512837,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13067993819713591,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17672376036643983,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018687231000512837,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018687231000512837,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1259875252842903,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17063040137290955,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018016215413808822,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018016215413808822,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09146946370601654,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.12508394569158554,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001308013335801661,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001308013335801661,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.058338577300310133,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07590894401073456,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008342416607774794,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008342416607774794,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11929452270269394,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.152734637260437,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001705911778844893,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001705911778844893,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13067993819713591,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17672376036643983,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018687231000512837,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018687231000512837,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.1656603853248881,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8566291969090212,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8535527032072778,
|
||
|
|
"calibration/confidence_entropy": 0.40443817859718545,
|
||
|
|
"calibration/coverage@0%": 0.017381097120307547,
|
||
|
|
"calibration/coverage@1%": 0.017381097120307547,
|
||
|
|
"calibration/coverage@10%": 0.49995407558530747,
|
||
|
|
"calibration/coverage@15%": 0.5949152537180693,
|
||
|
|
"calibration/coverage@20%": 0.6571557394509561,
|
||
|
|
"calibration/coverage@25%": 0.7211049372044049,
|
||
|
|
"calibration/coverage@30%": 0.8517150160293824,
|
||
|
|
"calibration/coverage@5%": 0.057801044626869226,
|
||
|
|
"calibration/ece": 0.11454567795901449,
|
||
|
|
"calibration/mean_confidence": 0.667435459092505,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.013107638888888884,
|
||
|
|
"completions/max_length": 3570.0,
|
||
|
|
"completions/max_terminated_length": 3570.0,
|
||
|
|
"completions/mean_length": 726.2890747070312,
|
||
|
|
"completions/mean_terminated_length": 736.1373657226562,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 189.6,
|
||
|
|
"epoch": 0.39599505006187424,
|
||
|
|
"grad_norm": 0.00045935352682136,
|
||
|
|
"learning_rate": 1.2951807228915664e-06,
|
||
|
|
"loss": -0.0097,
|
||
|
|
"num_tokens": 360550651.0,
|
||
|
|
"reward": 1.0085692167282105,
|
||
|
|
"reward_std": 0.12360656410455703,
|
||
|
|
"rewards/accuracy_reward": 0.6678819537162781,
|
||
|
|
"rewards/brier_reward": 0.8069667339324951,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9387310028076172,
|
||
|
|
"rewards/format_reward": 0.9868923664093018,
|
||
|
|
"rewards/frontier_coverage_0": 0.04505334049463272,
|
||
|
|
"rewards/frontier_coverage_1": 0.04505334049463272,
|
||
|
|
"rewards/frontier_coverage_10": 0.044467170163989066,
|
||
|
|
"rewards/frontier_coverage_15": 0.041498401761054994,
|
||
|
|
"rewards/frontier_coverage_20": 0.06552209258079529,
|
||
|
|
"rewards/frontier_coverage_25": 0.17571614384651185,
|
||
|
|
"rewards/frontier_coverage_5": 0.045087074488401414,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14957682192325591,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.20039042532444,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4194444417953491,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07478841096162796,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07478841096162796,
|
||
|
|
"signal/advantage_abs_mean": 0.09047110676765442,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09047110676765442,
|
||
|
|
"signal/advantage_pre_scale_std": 0.16116170585155487,
|
||
|
|
"signal/advantage_std": 0.16116170585155487,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13711402416229249,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.17915517389774321,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013711402378976344,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013711402378976344,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03691897690296173,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05648680925369263,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036918976344168185,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036918976344168185,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.020296223647892474,
|
||
|
|
"signal/format_reward/group_std_mean": 0.03649218082427978,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8527777791023254,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010148111823946237,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010148111823946237,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13668433278799058,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1877914160490036,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019545859657227995,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019545859657227995,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13668433278799058,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1877914160490036,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019545859657227995,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019545859657227995,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13133783787488937,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18079141676425933,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018781311810016632,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018781311810016632,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08820350021123886,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.12286703139543534,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012613100348971783,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012613100348971783,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.062378589808940885,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0826146811246872,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008920137654058636,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008920137654058636,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11272307336330414,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14559331238269807,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016119398642331362,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016119398642331362,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1365072175860405,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1875326693058014,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019520531874150037,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019520531874150037,
|
||
|
|
"step": 165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.13592004663092033,
|
||
|
|
"calibration/batch_distribution_entropy": 0.7486099576692926,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8564281533478454,
|
||
|
|
"calibration/confidence_entropy": 0.37029655417641594,
|
||
|
|
"calibration/coverage@0%": 0.033480513444424695,
|
||
|
|
"calibration/coverage@1%": 0.07117684852295873,
|
||
|
|
"calibration/coverage@10%": 0.4261042923170245,
|
||
|
|
"calibration/coverage@15%": 0.6691992052004629,
|
||
|
|
"calibration/coverage@20%": 0.7751633169909933,
|
||
|
|
"calibration/coverage@25%": 0.8759031317778053,
|
||
|
|
"calibration/coverage@30%": 0.9725213090748339,
|
||
|
|
"calibration/coverage@5%": 0.1496657690046888,
|
||
|
|
"calibration/ece": 0.09613408015602516,
|
||
|
|
"calibration/mean_confidence": 0.7586426281695957,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.009548611111111138,
|
||
|
|
"completions/max_length": 3430.4,
|
||
|
|
"completions/max_terminated_length": 3430.4,
|
||
|
|
"completions/mean_length": 703.7692016601562,
|
||
|
|
"completions/mean_terminated_length": 710.5459350585937,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 186.0,
|
||
|
|
"epoch": 0.4079949000637492,
|
||
|
|
"grad_norm": 0.0004403712518978864,
|
||
|
|
"learning_rate": 1.1445783132530121e-06,
|
||
|
|
"loss": -0.0087,
|
||
|
|
"num_tokens": 371747256.0,
|
||
|
|
"reward": 1.0362069845199584,
|
||
|
|
"reward_std": 0.12289563715457916,
|
||
|
|
"rewards/accuracy_reward": 0.722569465637207,
|
||
|
|
"rewards/brier_reward": 0.8146629929542542,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9312249541282653,
|
||
|
|
"rewards/format_reward": 0.9904513955116272,
|
||
|
|
"rewards/frontier_coverage_0": 0.015381347015500068,
|
||
|
|
"rewards/frontier_coverage_1": 0.015381347015500068,
|
||
|
|
"rewards/frontier_coverage_10": 0.016636411473155022,
|
||
|
|
"rewards/frontier_coverage_15": 0.02498224622104317,
|
||
|
|
"rewards/frontier_coverage_20": 0.07068880349397659,
|
||
|
|
"rewards/frontier_coverage_25": 0.19831772446632384,
|
||
|
|
"rewards/frontier_coverage_5": 0.01580010838806629,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1472873270511627,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.19956836700439454,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4138888955116272,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07364366352558135,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07364366352558135,
|
||
|
|
"signal/advantage_abs_mean": 0.08684393763542175,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.08684393763542175,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1598696678876877,
|
||
|
|
"signal/advantage_std": 0.1598696678876877,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13169292807579042,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.17282358705997466,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013169292360544205,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013169292360544205,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.039322879165410995,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06056636646389961,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003932288242504,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003932288242504,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.01654730923473835,
|
||
|
|
"signal/format_reward/group_std_mean": 0.033677156642079355,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.85,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008273654617369175,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008273654617369175,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.11836729645729065,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.16220209896564483,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016926524229347705,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016926524229347705,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11836729645729065,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16220209896564483,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016926524229347705,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016926524229347705,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11148134768009185,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15295161604881286,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015941831981763244,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015941831981763244,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07999386936426163,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10980904847383499,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001143912342377007,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001143912342377007,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0641142837703228,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08305520564317703,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009168342221528292,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009168342221528292,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11506871432065964,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14938458502292634,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016454826574772597,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016454826574772597,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11742766797542573,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1609276831150055,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001679215719923377,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001679215719923377,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.13348262780460535,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8223084451849527,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8346515524361726,
|
||
|
|
"calibration/confidence_entropy": 0.3854238749390199,
|
||
|
|
"calibration/coverage@0%": 0.036539926699624505,
|
||
|
|
"calibration/coverage@1%": 0.036539926699624505,
|
||
|
|
"calibration/coverage@10%": 0.422825227026967,
|
||
|
|
"calibration/coverage@15%": 0.5584091115953049,
|
||
|
|
"calibration/coverage@20%": 0.8452149741820681,
|
||
|
|
"calibration/coverage@25%": 0.9379946198087186,
|
||
|
|
"calibration/coverage@30%": 0.9821740872231969,
|
||
|
|
"calibration/coverage@5%": 0.13151984499498745,
|
||
|
|
"calibration/ece": 0.0950542819703474,
|
||
|
|
"calibration/mean_confidence": 0.7023832506280211,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.012934027777777812,
|
||
|
|
"completions/max_length": 3636.2,
|
||
|
|
"completions/max_terminated_length": 3636.2,
|
||
|
|
"completions/mean_length": 736.4591186523437,
|
||
|
|
"completions/mean_terminated_length": 746.1770385742187,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 198.8,
|
||
|
|
"epoch": 0.4199947500656242,
|
||
|
|
"grad_norm": 0.00045567096094600856,
|
||
|
|
"learning_rate": 9.93975903614458e-07,
|
||
|
|
"loss": -0.0117,
|
||
|
|
"num_tokens": 383339233.0,
|
||
|
|
"reward": 1.0289367198944093,
|
||
|
|
"reward_std": 0.1303061842918396,
|
||
|
|
"rewards/accuracy_reward": 0.71171875,
|
||
|
|
"rewards/brier_reward": 0.8070045828819274,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9229098796844483,
|
||
|
|
"rewards/format_reward": 0.9870659708976746,
|
||
|
|
"rewards/frontier_coverage_0": 0.01920226626098156,
|
||
|
|
"rewards/frontier_coverage_1": 0.01920226626098156,
|
||
|
|
"rewards/frontier_coverage_10": 0.01852501593530178,
|
||
|
|
"rewards/frontier_coverage_15": 0.029568823985755444,
|
||
|
|
"rewards/frontier_coverage_20": 0.10787947475910187,
|
||
|
|
"rewards/frontier_coverage_25": 0.2446742206811905,
|
||
|
|
"rewards/frontier_coverage_5": 0.019195317476987838,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1561903178691864,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.2148987740278244,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3638888955116272,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0780951589345932,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0780951589345932,
|
||
|
|
"signal/advantage_abs_mean": 0.0919778436422348,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.0919778436422348,
|
||
|
|
"signal/advantage_pre_scale_std": 0.16701272428035735,
|
||
|
|
"signal/advantage_std": 0.16701272428035735,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.14022985696792603,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.18417258262634278,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014022985659539699,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014022985659539699,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04660597518086433,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06790307313203811,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0046605975832790135,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0046605975832790135,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.02120768204331398,
|
||
|
|
"signal/format_reward/group_std_mean": 0.03753828890621662,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8499999880790711,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01060384102165699,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01060384102165699,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13295696824789047,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18069115579128264,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019012847449630498,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019012847449630498,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13295696824789047,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18069115579128264,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019012847449630498,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019012847449630498,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11907797455787658,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16228229701519012,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017028149915859104,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017028149915859104,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07870914041996002,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10632596611976623,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00112554068909958,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00112554068909958,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08249068260192871,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10877174586057663,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011796167120337487,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011796167120337487,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1382613003253937,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1832427829504013,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019771367078647017,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019771367078647017,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13113310188055038,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17831478118896485,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018752032425254582,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018752032425254582,
|
||
|
|
"step": 175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.09345007181896194,
|
||
|
|
"calibration/batch_distribution_entropy": 0.801623303579988,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8262189453353539,
|
||
|
|
"calibration/confidence_entropy": 0.38592001304418866,
|
||
|
|
"calibration/coverage@0%": 0.08496741084676991,
|
||
|
|
"calibration/coverage@1%": 0.14393717568439587,
|
||
|
|
"calibration/coverage@10%": 0.6195064116269997,
|
||
|
|
"calibration/coverage@15%": 0.8078410920640862,
|
||
|
|
"calibration/coverage@20%": 0.9067183277342291,
|
||
|
|
"calibration/coverage@25%": 0.9640873460246361,
|
||
|
|
"calibration/coverage@30%": 0.9936842105263158,
|
||
|
|
"calibration/coverage@5%": 0.3787979947574124,
|
||
|
|
"calibration/ece": 0.09400609019464161,
|
||
|
|
"calibration/mean_confidence": 0.7386594045732192,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.011631944444444441,
|
||
|
|
"completions/max_length": 3725.4,
|
||
|
|
"completions/max_terminated_length": 3725.4,
|
||
|
|
"completions/mean_length": 705.6876831054688,
|
||
|
|
"completions/mean_terminated_length": 713.9794677734375,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 216.8,
|
||
|
|
"epoch": 0.4319946000674992,
|
||
|
|
"grad_norm": 0.0004313621611800045,
|
||
|
|
"learning_rate": 8.433734939759036e-07,
|
||
|
|
"loss": -0.0093,
|
||
|
|
"num_tokens": 394568723.0,
|
||
|
|
"reward": 1.0281787872314454,
|
||
|
|
"reward_std": 0.12547616958618163,
|
||
|
|
"rewards/accuracy_reward": 0.7125000119209289,
|
||
|
|
"rewards/brier_reward": 0.7898874640464782,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9186937093734742,
|
||
|
|
"rewards/format_reward": 0.9881944537162781,
|
||
|
|
"rewards/frontier_coverage_0": 0.010317787062376738,
|
||
|
|
"rewards/frontier_coverage_1": 0.010276203881949187,
|
||
|
|
"rewards/frontier_coverage_10": 0.013230977579951286,
|
||
|
|
"rewards/frontier_coverage_15": 0.0318543815985322,
|
||
|
|
"rewards/frontier_coverage_20": 0.12444168329238892,
|
||
|
|
"rewards/frontier_coverage_25": 0.2868518948554993,
|
||
|
|
"rewards/frontier_coverage_5": 0.010678381472826005,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15108506977558137,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.1995271176099777,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07554253488779068,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07554253488779068,
|
||
|
|
"signal/advantage_abs_mean": 0.09149419367313386,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09149419367313386,
|
||
|
|
"signal/advantage_pre_scale_std": 0.16792958378791809,
|
||
|
|
"signal/advantage_std": 0.16792958378791809,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.140881010890007,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1828139305114746,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01408810093998909,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01408810093998909,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04758927077054977,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06954466402530671,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004758927039802074,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004758927039802074,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.019726562313735485,
|
||
|
|
"signal/format_reward/group_std_mean": 0.037197813764214514,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8444444417953492,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009863281156867743,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009863281156867743,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.12390959560871125,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17109252214431764,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017719071358442307,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017719071358442307,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12380760312080383,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17095208466053008,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017704485915601253,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017704485915601253,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10980342477560043,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1521240144968033,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015701889526098967,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015701889526098967,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0799461305141449,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10864663273096084,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011432296945713461,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011432296945713461,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08835995942354202,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11567166298627854,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001263547409325838,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001263547409325838,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16603110134601592,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.21807546317577362,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023742446210235357,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023742446210235357,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1223075494170189,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16897510588169098,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017489979742094874,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017489979742094874,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.17122914709216733,
|
||
|
|
"calibration/batch_distribution_entropy": 0.7880253868471856,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8186282369699118,
|
||
|
|
"calibration/confidence_entropy": 0.3642710142144591,
|
||
|
|
"calibration/coverage@0%": 0.018777247401564758,
|
||
|
|
"calibration/coverage@1%": 0.018777247401564758,
|
||
|
|
"calibration/coverage@10%": 0.1579079193038335,
|
||
|
|
"calibration/coverage@15%": 0.5548855949538294,
|
||
|
|
"calibration/coverage@20%": 0.8632928767984499,
|
||
|
|
"calibration/coverage@25%": 0.9036374379904082,
|
||
|
|
"calibration/coverage@30%": 0.9350785340314136,
|
||
|
|
"calibration/coverage@5%": 0.018777247401564758,
|
||
|
|
"calibration/ece": 0.14703350637447332,
|
||
|
|
"calibration/mean_confidence": 0.7192671705590277,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.014843750000000022,
|
||
|
|
"completions/max_length": 3571.8,
|
||
|
|
"completions/max_terminated_length": 3571.8,
|
||
|
|
"completions/mean_length": 706.883251953125,
|
||
|
|
"completions/mean_terminated_length": 717.6452026367188,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 183.0,
|
||
|
|
"epoch": 0.44399445006937416,
|
||
|
|
"grad_norm": 0.00048535055248066783,
|
||
|
|
"learning_rate": 6.927710843373495e-07,
|
||
|
|
"loss": -0.0105,
|
||
|
|
"num_tokens": 405802066.0,
|
||
|
|
"reward": 1.014847993850708,
|
||
|
|
"reward_std": 0.13151782006025314,
|
||
|
|
"rewards/accuracy_reward": 0.686718761920929,
|
||
|
|
"rewards/brier_reward": 0.7976317048072815,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.922605574131012,
|
||
|
|
"rewards/format_reward": 0.9850694537162781,
|
||
|
|
"rewards/frontier_coverage_0": 0.026550618838518857,
|
||
|
|
"rewards/frontier_coverage_1": 0.026589373406022788,
|
||
|
|
"rewards/frontier_coverage_10": 0.027571763657033444,
|
||
|
|
"rewards/frontier_coverage_15": 0.03620800599455833,
|
||
|
|
"rewards/frontier_coverage_20": 0.08736573904752731,
|
||
|
|
"rewards/frontier_coverage_25": 0.25345793068408967,
|
||
|
|
"rewards/frontier_coverage_5": 0.026884720474481583,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1610948324203491,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.21088041365146637,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08054741621017455,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08054741621017455,
|
||
|
|
"signal/advantage_abs_mean": 0.09806140363216401,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09806140363216401,
|
||
|
|
"signal/advantage_pre_scale_std": 0.16944925785064696,
|
||
|
|
"signal/advantage_std": 0.16944925785064696,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.14560845494270325,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.18695748448371888,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014560846239328384,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014560846239328384,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.045430511236190796,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06660099476575851,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00454305112361908,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00454305112361908,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.0214952252805233,
|
||
|
|
"signal/format_reward/group_std_mean": 0.037768884748220447,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.850000011920929,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01074761264026165,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01074761264026165,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13311404585838318,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18202302753925323,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019035307923331857,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019035307923331857,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1330111652612686,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18189111053943635,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019020596519112587,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019020596519112587,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12245990186929703,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16796686351299286,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001751176593825221,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001751176593825221,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09790500402450561,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13461994379758835,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014000415336340665,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014000415336340665,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07675344049930573,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10184175372123719,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010975741781294346,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010975741781294346,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1587800681591034,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.20560413897037505,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022705549374222754,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022705549374222754,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1313490241765976,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17971158623695374,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018782909493893385,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018782909493893385,
|
||
|
|
"step": 185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.18193512910285387,
|
||
|
|
"calibration/batch_distribution_entropy": 0.7778199175476095,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8242070841531243,
|
||
|
|
"calibration/confidence_entropy": 0.3678612367693591,
|
||
|
|
"calibration/coverage@0%": 0.05865979043198364,
|
||
|
|
"calibration/coverage@1%": 0.05865979043198364,
|
||
|
|
"calibration/coverage@10%": 0.34553174257808716,
|
||
|
|
"calibration/coverage@15%": 0.38030753968253966,
|
||
|
|
"calibration/coverage@20%": 0.586945316731242,
|
||
|
|
"calibration/coverage@25%": 0.854896653543307,
|
||
|
|
"calibration/coverage@30%": 0.9696645341207348,
|
||
|
|
"calibration/coverage@5%": 0.2798431520853192,
|
||
|
|
"calibration/ece": 0.12763212133180607,
|
||
|
|
"calibration/mean_confidence": 0.7513209126914786,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00824652777777779,
|
||
|
|
"completions/max_length": 3541.6,
|
||
|
|
"completions/max_terminated_length": 3541.6,
|
||
|
|
"completions/mean_length": 699.0154541015625,
|
||
|
|
"completions/mean_terminated_length": 704.8328979492187,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 183.0,
|
||
|
|
"epoch": 0.45599430007124914,
|
||
|
|
"grad_norm": 0.00043432877282612026,
|
||
|
|
"learning_rate": 5.421686746987952e-07,
|
||
|
|
"loss": -0.0066,
|
||
|
|
"num_tokens": 416937668.0,
|
||
|
|
"reward": 1.0384333848953247,
|
||
|
|
"reward_std": 0.12415469735860825,
|
||
|
|
"rewards/accuracy_reward": 0.7216145873069764,
|
||
|
|
"rewards/brier_reward": 0.8176455974578858,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9341374635696411,
|
||
|
|
"rewards/format_reward": 0.9917534708976745,
|
||
|
|
"rewards/frontier_coverage_0": 0.02207240234129131,
|
||
|
|
"rewards/frontier_coverage_1": 0.022145295469090342,
|
||
|
|
"rewards/frontier_coverage_10": 0.023324301373213528,
|
||
|
|
"rewards/frontier_coverage_15": 0.0292608555406332,
|
||
|
|
"rewards/frontier_coverage_20": 0.07275687083601952,
|
||
|
|
"rewards/frontier_coverage_25": 0.2676252216100693,
|
||
|
|
"rewards/frontier_coverage_5": 0.0223290272988379,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16108398735523224,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.2116878628730774,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08054199367761612,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08054199367761612,
|
||
|
|
"signal/advantage_abs_mean": 0.08960044384002686,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.08960044384002686,
|
||
|
|
"signal/advantage_pre_scale_std": 0.16030249297618865,
|
||
|
|
"signal/advantage_std": 0.16030249297618865,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13036527633666992,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.17200060486793517,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013036527484655381,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013036527484655381,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0354267667979002,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05462343618273735,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003542676754295826,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003542676754295826,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.01468641497194767,
|
||
|
|
"signal/format_reward/group_std_mean": 0.02957034520804882,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8722222208976745,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007343207485973835,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007343207485973835,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1278734177350998,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17466306388378144,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018285899190232159,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018285899190232159,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12777684330940248,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1745421200990677,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018272089073434472,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018272089073434472,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12328650057315826,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16869595050811767,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017629968700930477,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017629968700930477,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10532844662666321,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.14432147443294524,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015061968471854926,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015061968471854926,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07010861709713936,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09235068708658219,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010025532450526954,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010025532450526954,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1480533003807068,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.19292950630187988,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002117162151262164,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002117162151262164,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12679695785045625,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17328327000141144,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018131964607164264,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018131964607164264,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.19111161916549027,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8201703152347986,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8106126533134101,
|
||
|
|
"calibration/confidence_entropy": 0.37700403449513026,
|
||
|
|
"calibration/coverage@0%": 0.015711122047244095,
|
||
|
|
"calibration/coverage@1%": 0.015711122047244095,
|
||
|
|
"calibration/coverage@10%": 0.27973864449795016,
|
||
|
|
"calibration/coverage@15%": 0.5170787035530683,
|
||
|
|
"calibration/coverage@20%": 0.5962166628706536,
|
||
|
|
"calibration/coverage@25%": 0.6621170432853951,
|
||
|
|
"calibration/coverage@30%": 0.7699052081299754,
|
||
|
|
"calibration/coverage@5%": 0.11544865485564304,
|
||
|
|
"calibration/ece": 0.14145817553412607,
|
||
|
|
"calibration/mean_confidence": 0.6937342965939054,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.012673611111111094,
|
||
|
|
"completions/max_length": 3786.0,
|
||
|
|
"completions/max_terminated_length": 3786.0,
|
||
|
|
"completions/mean_length": 717.1263916015625,
|
||
|
|
"completions/mean_terminated_length": 726.4796875,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 180.2,
|
||
|
|
"epoch": 0.46799415007312406,
|
||
|
|
"grad_norm": 0.0004493010346777737,
|
||
|
|
"learning_rate": 3.91566265060241e-07,
|
||
|
|
"loss": -0.0096,
|
||
|
|
"num_tokens": 428279828.0,
|
||
|
|
"reward": 1.008443033695221,
|
||
|
|
"reward_std": 0.12740874141454697,
|
||
|
|
"rewards/accuracy_reward": 0.6736111044883728,
|
||
|
|
"rewards/brier_reward": 0.7872533559799194,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9333649158477784,
|
||
|
|
"rewards/format_reward": 0.9872395873069764,
|
||
|
|
"rewards/frontier_coverage_0": 0.027697153389453888,
|
||
|
|
"rewards/frontier_coverage_1": 0.027658072859048845,
|
||
|
|
"rewards/frontier_coverage_10": 0.027347087673842908,
|
||
|
|
"rewards/frontier_coverage_15": 0.030936553701758386,
|
||
|
|
"rewards/frontier_coverage_20": 0.06847289353609085,
|
||
|
|
"rewards/frontier_coverage_25": 0.20654830634593963,
|
||
|
|
"rewards/frontier_coverage_5": 0.027833018451929092,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1546983540058136,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.2061179220676422,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40555556416511535,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0773491770029068,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0773491770029068,
|
||
|
|
"signal/advantage_abs_mean": 0.09210169464349746,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.09210169464349746,
|
||
|
|
"signal/advantage_pre_scale_std": 0.16345611214637756,
|
||
|
|
"signal/advantage_std": 0.16345611214637756,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.14198019355535507,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.18473336696624756,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014198019355535507,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014198019355535507,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03743585646152496,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.058041921257972716,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037435856182128193,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037435856182128193,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.019276259280741215,
|
||
|
|
"signal/format_reward/group_std_mean": 0.036431630700826646,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8472222447395324,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009638129640370608,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009638129640370608,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.12945400625467302,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1761310279369354,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001851192256435752,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001851192256435752,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12929727286100387,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17592544853687286,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018489510286599398,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018489510286599398,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12513509392738342,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17050479352474213,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001789431762881577,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001789431762881577,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10048199743032456,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13747784048318862,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014368925243616105,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014368925243616105,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07295912206172943,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09612232595682144,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010433154529891908,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010433154529891908,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13768279552459717,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.18044605255126953,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001968864002265036,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001968864002265036,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12782656103372575,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17402253448963165,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018279198091477155,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018279198091477155,
|
||
|
|
"step": 195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.17856991685454757,
|
||
|
|
"calibration/batch_distribution_entropy": 0.7682419787958594,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8208414222154016,
|
||
|
|
"calibration/confidence_entropy": 0.3823009379147786,
|
||
|
|
"calibration/coverage@0%": 0.012575064242624123,
|
||
|
|
"calibration/coverage@1%": 0.012575064242624123,
|
||
|
|
"calibration/coverage@10%": 0.29729537286758967,
|
||
|
|
"calibration/coverage@15%": 0.43410561899657835,
|
||
|
|
"calibration/coverage@20%": 0.6063885338939963,
|
||
|
|
"calibration/coverage@25%": 0.8951074475065617,
|
||
|
|
"calibration/coverage@30%": 0.9317585301837269,
|
||
|
|
"calibration/coverage@5%": 0.17731190634788727,
|
||
|
|
"calibration/ece": 0.141153362219527,
|
||
|
|
"calibration/mean_confidence": 0.7661149606034827,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00859375,
|
||
|
|
"completions/max_length": 3273.2,
|
||
|
|
"completions/max_terminated_length": 3273.2,
|
||
|
|
"completions/mean_length": 702.9766479492188,
|
||
|
|
"completions/mean_terminated_length": 709.0731323242187,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 182.6,
|
||
|
|
"epoch": 0.47999400007499904,
|
||
|
|
"grad_norm": 0.0004507655103225261,
|
||
|
|
"learning_rate": 2.409638554216868e-07,
|
||
|
|
"loss": -0.006,
|
||
|
|
"num_tokens": 439445927.0,
|
||
|
|
"reward": 1.0263540983200072,
|
||
|
|
"reward_std": 0.12140857428312302,
|
||
|
|
"rewards/accuracy_reward": 0.6982638835906982,
|
||
|
|
"rewards/brier_reward": 0.8136134743690491,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9341794133186341,
|
||
|
|
"rewards/format_reward": 0.99140625,
|
||
|
|
"rewards/frontier_coverage_0": 0.032625177130103114,
|
||
|
|
"rewards/frontier_coverage_1": 0.032625177130103114,
|
||
|
|
"rewards/frontier_coverage_10": 0.031621862575411795,
|
||
|
|
"rewards/frontier_coverage_15": 0.029490308091044425,
|
||
|
|
"rewards/frontier_coverage_20": 0.0741084560751915,
|
||
|
|
"rewards/frontier_coverage_25": 0.23827401399612427,
|
||
|
|
"rewards/frontier_coverage_5": 0.032565965130925176,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14779730439186095,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.19302791357040405,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.45555556416511533,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07389865219593048,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07389865219593048,
|
||
|
|
"signal/advantage_abs_mean": 0.08865651488304138,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.08865651488304138,
|
||
|
|
"signal/advantage_pre_scale_std": 0.16045403480529785,
|
||
|
|
"signal/advantage_std": 0.16045403480529785,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1274328500032425,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1681652307510376,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012743284739553929,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012743284739553929,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03525126874446869,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.057352755963802335,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035251271445304157,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035251271445304157,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.015196397714316846,
|
||
|
|
"signal/format_reward/group_std_mean": 0.033493170887231825,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8416666746139526,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007598198857158423,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007598198857158423,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.10695488750934601,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.15166882872581483,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015294548822566867,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0015294548822566867,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10695488750934601,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15166882872581483,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015294548822566867,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015294548822566867,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1000509575009346,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.14240919947624206,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014307287288829683,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014307287288829683,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08026445358991623,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11479192227125168,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011477816849946975,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011477816849946975,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07273156195878983,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09654622375965119,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010400613187812268,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010400613187812268,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14482411444187165,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.18901716768741608,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020709848264232277,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020709848264232277,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10635966509580612,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15090845227241517,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015209432225674392,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015209432225674392,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47999400007499904,
|
||
|
|
"eval_calibration/aurc": 0.15612408573344327,
|
||
|
|
"eval_calibration/batch_distribution_entropy": 0.7397058913988753,
|
||
|
|
"eval_calibration/buffer_distribution_entropy": 0.8111514149268445,
|
||
|
|
"eval_calibration/confidence_entropy": 0.3610454566922106,
|
||
|
|
"eval_calibration/coverage@0%": 0.09895833333333333,
|
||
|
|
"eval_calibration/coverage@1%": 0.09895833333333333,
|
||
|
|
"eval_calibration/coverage@10%": 0.4114583333333333,
|
||
|
|
"eval_calibration/coverage@15%": 0.6510416666666666,
|
||
|
|
"eval_calibration/coverage@20%": 0.7864583333333334,
|
||
|
|
"eval_calibration/coverage@25%": 0.9427083333333334,
|
||
|
|
"eval_calibration/coverage@30%": 0.9895833333333334,
|
||
|
|
"eval_calibration/coverage@5%": 0.20833333333333334,
|
||
|
|
"eval_calibration/ece": 0.164064940040337,
|
||
|
|
"eval_calibration/mean_confidence": 0.7508913785324004,
|
||
|
|
"eval_completions/clipped_ratio": 0.006076388888888877,
|
||
|
|
"eval_completions/max_length": 2743.3333333333335,
|
||
|
|
"eval_completions/max_terminated_length": 2743.3333333333335,
|
||
|
|
"eval_completions/mean_length": 718.5382080078125,
|
||
|
|
"eval_completions/mean_terminated_length": 722.8815104166666,
|
||
|
|
"eval_completions/min_length": 98.83333333333333,
|
||
|
|
"eval_completions/min_terminated_length": 235.33333333333334,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 439445927.0,
|
||
|
|
"eval_reward": 1.012143760919571,
|
||
|
|
"eval_reward_std": 0.2549586296081543,
|
||
|
|
"eval_rewards/accuracy_reward": 0.6822916666666666,
|
||
|
|
"eval_rewards/brier_reward": 0.8020952641963959,
|
||
|
|
"eval_rewards/confidence_uniqueness_reward": 0.884552131096522,
|
||
|
|
"eval_rewards/format_reward": 0.9921874900658926,
|
||
|
|
"eval_rewards/frontier_coverage_0": 0.03533175913617015,
|
||
|
|
"eval_rewards/frontier_coverage_1": 0.03533175913617015,
|
||
|
|
"eval_rewards/frontier_coverage_10": 0.035481404474315546,
|
||
|
|
"eval_rewards/frontier_coverage_15": 0.03456710961957773,
|
||
|
|
"eval_rewards/frontier_coverage_20": 0.06474988100429376,
|
||
|
|
"eval_rewards/frontier_coverage_25": 0.19553381452957788,
|
||
|
|
"eval_rewards/frontier_coverage_5": 0.035325445909014284,
|
||
|
|
"eval_runtime": 205.202,
|
||
|
|
"eval_samples_per_second": 4.873,
|
||
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4191623230775197,
|
||
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4642222821712494,
|
||
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20958116153875986,
|
||
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
||
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20958116153875986,
|
||
|
|
"eval_signal/advantage_abs_mean": 0.22168858846028647,
|
||
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.22168858846028647,
|
||
|
|
"eval_signal/advantage_pre_scale_std": 0.2531501104434331,
|
||
|
|
"eval_signal/advantage_std": 0.2531501104434331,
|
||
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.22948966672023138,
|
||
|
|
"eval_signal/brier_reward/group_std_mean": 0.29068108399709064,
|
||
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022948966672023136,
|
||
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.022948966672023136,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.057275036349892616,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08676877121130626,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00572750383677582,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00572750383677582,
|
||
|
|
"eval_signal/format_reward/centered_abs_mean": 0.015136718284338713,
|
||
|
|
"eval_signal/format_reward/group_std_mean": 0.044194173999130726,
|
||
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.750000019868215,
|
||
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007568359142169356,
|
||
|
|
"eval_signal/format_reward/weight": 0.5,
|
||
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359142169356,
|
||
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.1817739779750506,
|
||
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.3032199541727702,
|
||
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025993677166601024,
|
||
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025993677166601024,
|
||
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.1817739779750506,
|
||
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.3032199541727702,
|
||
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025993677166601024,
|
||
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025993677166601024,
|
||
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.17377296338478723,
|
||
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.2915558119614919,
|
||
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002484953341384729,
|
||
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002484953341384729,
|
||
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.13976867124438286,
|
||
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.2382419357697169,
|
||
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019986919360235333,
|
||
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019986919360235333,
|
||
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.11765346676111221,
|
||
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.16934698323408762,
|
||
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016824445920065045,
|
||
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016824445920065045,
|
||
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.27798762917518616,
|
||
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.33808498084545135,
|
||
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00397522277974834,
|
||
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00397522277974834,
|
||
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.18090522040923437,
|
||
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.30195263028144836,
|
||
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025869448048373065,
|
||
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025869448048373065,
|
||
|
|
"eval_steps_per_second": 0.029,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.20067151651571807,
|
||
|
|
"calibration/batch_distribution_entropy": 0.7387570320570516,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8004499003116875,
|
||
|
|
"calibration/confidence_entropy": 0.3662937348410689,
|
||
|
|
"calibration/coverage@0%": 0.02244764397905759,
|
||
|
|
"calibration/coverage@1%": 0.02244764397905759,
|
||
|
|
"calibration/coverage@10%": 0.1557400741710297,
|
||
|
|
"calibration/coverage@15%": 0.376529777486911,
|
||
|
|
"calibration/coverage@20%": 0.47280793607054294,
|
||
|
|
"calibration/coverage@25%": 0.8266519702397355,
|
||
|
|
"calibration/coverage@30%": 0.9607329842931938,
|
||
|
|
"calibration/coverage@5%": 0.03338514397905759,
|
||
|
|
"calibration/ece": 0.13317514525521598,
|
||
|
|
"calibration/mean_confidence": 0.7690856718981816,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.008767361111111116,
|
||
|
|
"completions/max_length": 3502.2,
|
||
|
|
"completions/max_terminated_length": 3502.2,
|
||
|
|
"completions/mean_length": 717.9185913085937,
|
||
|
|
"completions/mean_terminated_length": 724.2613647460937,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 173.8,
|
||
|
|
"epoch": 0.491993850076874,
|
||
|
|
"grad_norm": 0.00037455017445608974,
|
||
|
|
"learning_rate": 9.036144578313253e-08,
|
||
|
|
"loss": -0.0073,
|
||
|
|
"num_tokens": 450782301.0,
|
||
|
|
"reward": 1.0498441219329835,
|
||
|
|
"reward_std": 0.11888199001550674,
|
||
|
|
"rewards/accuracy_reward": 0.7449652791023255,
|
||
|
|
"rewards/brier_reward": 0.8252813696861268,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9361446976661683,
|
||
|
|
"rewards/format_reward": 0.9912326455116272,
|
||
|
|
"rewards/frontier_coverage_0": 0.011065579298883677,
|
||
|
|
"rewards/frontier_coverage_1": 0.011065579298883677,
|
||
|
|
"rewards/frontier_coverage_10": 0.013013468869030476,
|
||
|
|
"rewards/frontier_coverage_15": 0.01967374011874199,
|
||
|
|
"rewards/frontier_coverage_20": 0.07497628033161163,
|
||
|
|
"rewards/frontier_coverage_25": 0.25076726377010344,
|
||
|
|
"rewards/frontier_coverage_5": 0.011220467463135719,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14809027910232545,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.20212058126926422,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.402777773141861,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07404513955116272,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07404513955116272,
|
||
|
|
"signal/advantage_abs_mean": 0.0827422708272934,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.0827422708272934,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1549478828907013,
|
||
|
|
"signal/advantage_std": 0.1549478828907013,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12346100956201553,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.16328605115413666,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012346101738512517,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012346101738512517,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0347956083714962,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05594836547970772,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034795609302818776,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034795609302818776,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.015869140625,
|
||
|
|
"signal/format_reward/group_std_mean": 0.03326698914170265,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8527777791023254,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0079345703125,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0079345703125,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.12408159524202347,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.16999780237674714,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017743667354807257,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017743667354807257,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12408159524202347,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16999780237674714,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017743667354807257,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017743667354807257,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1159680426120758,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15904043912887572,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016583429649472236,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016583429649472236,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09267009794712067,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.12770854830741882,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013251824537292122,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013251824537292122,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06762583926320076,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09054560959339142,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009670495055615902,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009670495055615902,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12514513731002808,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16600916385650635,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017895755358040334,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017895755358040334,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12371799796819687,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16952943801879883,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001769167324528098,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001769167324528098,
|
||
|
|
"step": 205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.09258411805079743,
|
||
|
|
"calibration/batch_distribution_entropy": 0.7909286874200081,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.7829421573350821,
|
||
|
|
"calibration/confidence_entropy": 0.380171748559697,
|
||
|
|
"calibration/coverage@0%": 0.07280701754385965,
|
||
|
|
"calibration/coverage@1%": 0.10964912280701755,
|
||
|
|
"calibration/coverage@10%": 0.6285096107805916,
|
||
|
|
"calibration/coverage@15%": 0.7868216911868257,
|
||
|
|
"calibration/coverage@20%": 0.9004364963028112,
|
||
|
|
"calibration/coverage@25%": 0.9665205474352353,
|
||
|
|
"calibration/coverage@30%": 1.0,
|
||
|
|
"calibration/coverage@5%": 0.4454574210372016,
|
||
|
|
"calibration/ece": 0.06343821279388381,
|
||
|
|
"calibration/mean_confidence": 0.7582262227842415,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00853587962962965,
|
||
|
|
"completions/max_length": 2820.3333333333335,
|
||
|
|
"completions/max_terminated_length": 2820.3333333333335,
|
||
|
|
"completions/mean_length": 715.6140340169271,
|
||
|
|
"completions/mean_terminated_length": 721.9536946614584,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 167.33333333333334,
|
||
|
|
"epoch": 0.49919376007799904,
|
||
|
|
"num_tokens": 457594673.0,
|
||
|
|
"reward": 1.0277764797210693,
|
||
|
|
"reward_std": 0.12234559903542201,
|
||
|
|
"rewards/accuracy_reward": 0.7025462985038757,
|
||
|
|
"rewards/brier_reward": 0.8049926360448202,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.932228704293569,
|
||
|
|
"rewards/format_reward": 0.9914641181627909,
|
||
|
|
"rewards/frontier_coverage_0": 0.023547140260537464,
|
||
|
|
"rewards/frontier_coverage_1": 0.023547140260537464,
|
||
|
|
"rewards/frontier_coverage_10": 0.023169512239595253,
|
||
|
|
"rewards/frontier_coverage_15": 0.028680586876968544,
|
||
|
|
"rewards/frontier_coverage_20": 0.09770172089338303,
|
||
|
|
"rewards/frontier_coverage_25": 0.2726644178231557,
|
||
|
|
"rewards/frontier_coverage_5": 0.02363560472925504,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15646701554457346,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.20409129559993744,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4305555721124013,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07823350777228673,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07823350777228673,
|
||
|
|
"signal/advantage_abs_mean": 0.0893336609005928,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.0893336609005928,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1595475971698761,
|
||
|
|
"signal/advantage_std": 0.1595475971698761,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13335002462069193,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.177884042263031,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013335002275804678,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013335002275804678,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03668076234559218,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05635303258895874,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036680761259049177,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036680761259049177,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.0151457612713178,
|
||
|
|
"signal/format_reward/group_std_mean": 0.030251561353604,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8703703681627909,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0075728806356589,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0075728806356589,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.12804659952720007,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.174424409866333,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018310664454475045,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018310664454475045,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12804659952720007,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.174424409866333,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018310664454475045,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018310664454475045,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11198657502730687,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15388049681981406,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016014080417032044,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016014080417032044,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08891634891430537,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.12195203453302383,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012715038610622287,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012715038610622287,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07647461940844853,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10173061241706212,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010935871008162696,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010935871008162696,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1526160587867101,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2031193325916926,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002182409632951021,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002182409632951021,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12730942914883295,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17352166771888733,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001820524805225432,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001820524805225432,
|
||
|
|
"step": 208,
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_loss": -0.00772521308625493,
|
||
|
|
"train_runtime": 40973.9456,
|
||
|
|
"train_samples_per_second": 0.366,
|
||
|
|
"train_steps_per_second": 0.005
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 208,
|
||
|
|
"num_input_tokens_seen": 457594673,
|
||
|
|
"num_train_epochs": 1,
|
||
|
|
"save_steps": 60,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_batch_size": 6,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|