Model: hector-gr/RLCR-v4-ks-uniqueness-cov0-entropy100-noece-noaurc-scaletrue-cold-math Source: Original Platform
5721 lines
358 KiB
JSON
5721 lines
358 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.49919376007799904,
|
|
"eval_steps": 50,
|
|
"global_step": 208,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.4883207070890415,
|
|
"calibration/batch_distribution_entropy": 0.2739739421553503,
|
|
"calibration/confidence_entropy": 0.21793248029268142,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4604362091577833,
|
|
"calibration/mean_confidence": 0.9143221468537565,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.018576388888888906,
|
|
"completions/max_length": 3895.6,
|
|
"completions/max_terminated_length": 3895.6,
|
|
"completions/mean_length": 514.4408813476563,
|
|
"completions/mean_terminated_length": 524.181884765625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.011999850001874977,
|
|
"grad_norm": 0.0081259123980999,
|
|
"learning_rate": 5.952380952380953e-07,
|
|
"loss": 0.0056,
|
|
"num_tokens": 9040567.0,
|
|
"reward": 0.4569155514240265,
|
|
"reward_std": 0.41827074289321897,
|
|
"rewards/accuracy_reward": 0.2575520783662796,
|
|
"rewards/brier_reward": 0.30908964276313783,
|
|
"rewards/confidence_uniqueness_reward": 0.28769826889038086,
|
|
"rewards/format_reward": 0.5966145753860473,
|
|
"rewards/frontier_coverage_0": 0.27184249460697174,
|
|
"rewards/frontier_coverage_1": 0.27184249460697174,
|
|
"rewards/frontier_coverage_10": 0.27184249460697174,
|
|
"rewards/frontier_coverage_15": 0.27184249460697174,
|
|
"rewards/frontier_coverage_20": 0.27184249460697174,
|
|
"rewards/frontier_coverage_25": 0.27184249460697174,
|
|
"rewards/frontier_coverage_5": 0.27184249460697174,
|
|
"rewards/frontier_entropy_batch_reward": -0.5705800533294678,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.30725369453430174,
|
|
"signal/accuracy_reward/group_std_mean": 0.3699012637138367,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.07500000149011612,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.3921299993991852,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15362684726715087,
|
|
"signal/advantage_abs_mean": 0.848686158657074,
|
|
"signal/advantage_pre_scale_abs_mean": 0.35823245644569396,
|
|
"signal/advantage_pre_scale_std": 0.42261629104614257,
|
|
"signal/advantage_std": 0.9842132687568664,
|
|
"signal/brier_reward/centered_abs_mean": 0.3175659000873566,
|
|
"signal/brier_reward/group_std_mean": 0.37283719182014463,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08106742650270463,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03175659067928791,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.23240519165992737,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2853622674942017,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05935205966234207,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02324051931500435,
|
|
"signal/format_reward/centered_abs_mean": 0.44161783456802367,
|
|
"signal/format_reward/group_std_mean": 0.4756269872188568,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.5638803482055664,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.22080891728401184,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.3085132300853729,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.36870989203453064,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.011261269636452197,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004411739017814398,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.3085132300853729,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.36870989203453064,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.011261269636452197,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004411739017814398,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3085132300853729,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.36870989203453064,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011261269636452197,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004411739017814398,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3085132300853729,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.36870989203453064,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011261269636452197,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004411739017814398,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3085132300853729,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.36870989203453064,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.011261269636452197,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004411739017814398,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3085132300853729,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.36870989203453064,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.011261269636452197,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004411739017814398,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.3085132300853729,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.36870989203453064,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.011261269636452197,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004411739017814398,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4511567711830139,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.48259199857711793,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.1152160570025444,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.045115678757429126,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5107523789601409,
|
|
"calibration/batch_distribution_entropy": 0.24915467457321486,
|
|
"calibration/confidence_entropy": 0.21520335761112702,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4658758918099065,
|
|
"calibration/mean_confidence": 0.922980520374389,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01883680555555558,
|
|
"completions/max_length": 3971.0,
|
|
"completions/max_terminated_length": 3971.0,
|
|
"completions/mean_length": 469.39539794921876,
|
|
"completions/mean_terminated_length": 478.6153076171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 22.4,
|
|
"epoch": 0.023999700003749954,
|
|
"grad_norm": 0.05667172372341156,
|
|
"learning_rate": 1.1904761904761906e-06,
|
|
"loss": 0.0066,
|
|
"num_tokens": 17530722.0,
|
|
"reward": 0.5542843520641327,
|
|
"reward_std": 0.3886994063854218,
|
|
"rewards/accuracy_reward": 0.301215273141861,
|
|
"rewards/brier_reward": 0.3664989948272705,
|
|
"rewards/confidence_uniqueness_reward": 0.3651871979236603,
|
|
"rewards/format_reward": 0.7378472208976745,
|
|
"rewards/frontier_coverage_0": 0.3178509533405304,
|
|
"rewards/frontier_coverage_1": 0.3178509533405304,
|
|
"rewards/frontier_coverage_10": 0.3178509533405304,
|
|
"rewards/frontier_coverage_15": 0.3178509533405304,
|
|
"rewards/frontier_coverage_20": 0.3178509533405304,
|
|
"rewards/frontier_coverage_25": 0.3178509533405304,
|
|
"rewards/frontier_coverage_5": 0.3178509533405304,
|
|
"rewards/frontier_entropy_batch_reward": -0.7023240089416504,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.3270073771476746,
|
|
"signal/accuracy_reward/group_std_mean": 0.38548147678375244,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.06666666865348816,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.4557223439216614,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1635036885738373,
|
|
"signal/advantage_abs_mean": 0.8083321452140808,
|
|
"signal/advantage_pre_scale_abs_mean": 0.32132325768470765,
|
|
"signal/advantage_pre_scale_std": 0.392959201335907,
|
|
"signal/advantage_std": 0.984190571308136,
|
|
"signal/brier_reward/centered_abs_mean": 0.32042253017425537,
|
|
"signal/brier_reward/group_std_mean": 0.3735620677471161,
|
|
"signal/brier_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08921760171651841,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.032042254135012625,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2168998122215271,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2739565551280975,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.060243000835180284,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02168998159468174,
|
|
"signal/format_reward/centered_abs_mean": 0.3335828959941864,
|
|
"signal/format_reward/group_std_mean": 0.40515110492706297,
|
|
"signal/format_reward/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.45944651365280154,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1667914479970932,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.3215973138809204,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.37801494002342223,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01281338632106781,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0045988415367901325,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.3215973138809204,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.37801494002342223,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01281338632106781,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0045988415367901325,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3215973138809204,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.37801494002342223,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01281338632106781,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0045988415367901325,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3215973138809204,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.37801494002342223,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01281338632106781,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0045988415367901325,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3215973138809204,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.37801494002342223,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01281338632106781,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0045988415367901325,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3215973138809204,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.37801494002342223,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01281338632106781,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0045988415367901325,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.3215973138809204,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.37801494002342223,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01281338632106781,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0045988415367901325,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36690880060195924,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4326904654502869,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.10138487070798874,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03669087961316109,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5439928649912942,
|
|
"calibration/batch_distribution_entropy": 0.3275892998380167,
|
|
"calibration/confidence_entropy": 0.26077720370647295,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4911188693830021,
|
|
"calibration/mean_confidence": 0.9035915660059434,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009895833333333326,
|
|
"completions/max_length": 3997.8,
|
|
"completions/max_terminated_length": 3997.8,
|
|
"completions/mean_length": 426.6123352050781,
|
|
"completions/mean_terminated_length": 430.9161010742188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 32.4,
|
|
"epoch": 0.03599955000562493,
|
|
"grad_norm": 0.017714520916342735,
|
|
"learning_rate": 1.7857142857142859e-06,
|
|
"loss": -0.0218,
|
|
"num_tokens": 25547280.0,
|
|
"reward": 0.6818203568458557,
|
|
"reward_std": 0.2930204153060913,
|
|
"rewards/accuracy_reward": 0.3302083373069763,
|
|
"rewards/brier_reward": 0.44146730899810793,
|
|
"rewards/confidence_uniqueness_reward": 0.5243119597434998,
|
|
"rewards/format_reward": 0.9497395992279053,
|
|
"rewards/frontier_coverage_0": 0.3611275374889374,
|
|
"rewards/frontier_coverage_1": 0.3611275374889374,
|
|
"rewards/frontier_coverage_10": 0.3611275374889374,
|
|
"rewards/frontier_coverage_15": 0.3611275374889374,
|
|
"rewards/frontier_coverage_20": 0.3611275374889374,
|
|
"rewards/frontier_coverage_25": 0.3611275374889374,
|
|
"rewards/frontier_coverage_5": 0.3611275374889374,
|
|
"rewards/frontier_entropy_batch_reward": -0.9088038682937623,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.31500651240348815,
|
|
"signal/accuracy_reward/group_std_mean": 0.37460089921951295,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.09166666865348816,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6708433270454407,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15750325620174407,
|
|
"signal/advantage_abs_mean": 0.769465982913971,
|
|
"signal/advantage_pre_scale_abs_mean": 0.23625607192516326,
|
|
"signal/advantage_pre_scale_std": 0.30175902843475344,
|
|
"signal/advantage_std": 0.9840420842170715,
|
|
"signal/brier_reward/centered_abs_mean": 0.2925845801830292,
|
|
"signal/brier_reward/group_std_mean": 0.3448193073272705,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.12465540021657943,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02925845831632614,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.19031396508216858,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.23621676564216615,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0817145824432373,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01903139688074589,
|
|
"signal/format_reward/centered_abs_mean": 0.08739691749215125,
|
|
"signal/format_reward/group_std_mean": 0.15843217223882675,
|
|
"signal/format_reward/group_zero_std_frac": 0.3861111253499985,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.17899880260229112,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.04369845874607563,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.30582007169723513,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3615089595317841,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.018638250604271888,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004373226827010512,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.30582007169723513,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3615089595317841,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.018638250604271888,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004373226827010512,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.30582007169723513,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3615089595317841,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.018638250604271888,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004373226827010512,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.30582007169723513,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3615089595317841,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018638250604271888,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004373226827010512,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.30582007169723513,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3615089595317841,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018638250604271888,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004373226827010512,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.30582007169723513,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3615089595317841,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018638250604271888,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004373226827010512,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.30582007169723513,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3615089595317841,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.018638250604271888,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004373226827010512,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.15383070558309556,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.25853142738342283,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1277777798473835,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.06458796337246894,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015383070893585682,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4556803994965267,
|
|
"calibration/batch_distribution_entropy": 0.5350678484304228,
|
|
"calibration/buffer_distribution_entropy": 0.3417969188964092,
|
|
"calibration/confidence_entropy": 0.3897717372305213,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0433420365535248,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3313420910147438,
|
|
"calibration/mean_confidence": 0.8364994562189102,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010156249999999978,
|
|
"completions/max_length": 3824.2,
|
|
"completions/max_terminated_length": 3824.2,
|
|
"completions/mean_length": 465.0840270996094,
|
|
"completions/mean_terminated_length": 469.90997924804685,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 95.0,
|
|
"epoch": 0.04799940000749991,
|
|
"grad_norm": 0.014085509814321995,
|
|
"learning_rate": 2.380952380952381e-06,
|
|
"loss": -0.0318,
|
|
"num_tokens": 34018744.0,
|
|
"reward": 0.7619543671607971,
|
|
"reward_std": 0.23002811074256896,
|
|
"rewards/accuracy_reward": 0.43802083730697633,
|
|
"rewards/brier_reward": 0.5893322110176087,
|
|
"rewards/confidence_uniqueness_reward": 0.653409230709076,
|
|
"rewards/format_reward": 0.985850703716278,
|
|
"rewards/frontier_coverage_0": 0.19372253511101006,
|
|
"rewards/frontier_coverage_1": 0.19372253511101006,
|
|
"rewards/frontier_coverage_10": 0.19372253511101006,
|
|
"rewards/frontier_coverage_15": 0.19372253511101006,
|
|
"rewards/frontier_coverage_20": 0.19372253511101006,
|
|
"rewards/frontier_coverage_25": 0.19372253511101006,
|
|
"rewards/frontier_coverage_5": 0.19372253511101006,
|
|
"rewards/frontier_entropy_batch_reward": -0.9364717721939086,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2878797709941864,
|
|
"signal/accuracy_reward/group_std_mean": 0.35460472106933594,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.09444444626569748,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9924409985542297,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1439398854970932,
|
|
"signal/advantage_abs_mean": 0.7534375190734863,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1824011266231537,
|
|
"signal/advantage_pre_scale_std": 0.23983034789562224,
|
|
"signal/advantage_std": 0.9837870955467224,
|
|
"signal/brier_reward/centered_abs_mean": 0.23643364608287812,
|
|
"signal/brier_reward/group_std_mean": 0.2890482544898987,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1609581083059311,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.023643364757299425,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.15150568187236785,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.18613037019968032,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.09755237996578217,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015150568448007107,
|
|
"signal/format_reward/centered_abs_mean": 0.02616644911468029,
|
|
"signal/format_reward/group_std_mean": 0.058222611993551256,
|
|
"signal/format_reward/group_zero_std_frac": 0.7305555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08630450516939163,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013083224557340145,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14219243675470353,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18434092849493028,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.012311214115470648,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002033351955469698,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14219243675470353,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18434092849493028,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.012311214115470648,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002033351955469698,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14219243675470353,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18434092849493028,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012311214115470648,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002033351955469698,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14219243675470353,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18434092849493028,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012311214115470648,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002033351955469698,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.14219243675470353,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.18434092849493028,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012311214115470648,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002033351955469698,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14219243675470353,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.18434092849493028,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.012311214115470648,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002033351955469698,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14219243675470353,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18434092849493028,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.012311214115470648,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002033351955469698,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.11082728952169418,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.21301989257335663,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.25833334028720856,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.07670077979564667,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011082728952169418,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.32297898002690983,
|
|
"calibration/batch_distribution_entropy": 0.6393962661945383,
|
|
"calibration/buffer_distribution_entropy": 0.475857881253323,
|
|
"calibration/confidence_entropy": 0.5880384168326668,
|
|
"calibration/coverage@0%": 0.00737848722179194,
|
|
"calibration/coverage@1%": 0.00737848722179194,
|
|
"calibration/coverage@10%": 0.021514612876242203,
|
|
"calibration/coverage@15%": 0.053983590903203496,
|
|
"calibration/coverage@20%": 0.21906205622592817,
|
|
"calibration/coverage@25%": 0.3800217791924946,
|
|
"calibration/coverage@30%": 0.4740219432344893,
|
|
"calibration/coverage@5%": 0.00737848722179194,
|
|
"calibration/ece": 0.13618263020369642,
|
|
"calibration/mean_confidence": 0.6823366961993423,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0109375,
|
|
"completions/max_length": 3916.0,
|
|
"completions/max_terminated_length": 3916.0,
|
|
"completions/mean_length": 551.5579833984375,
|
|
"completions/mean_terminated_length": 557.6826171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 100.2,
|
|
"epoch": 0.05999925000937488,
|
|
"grad_norm": 0.04915325343608856,
|
|
"learning_rate": 2.9761904761904763e-06,
|
|
"loss": -0.0291,
|
|
"num_tokens": 43497140.0,
|
|
"reward": 0.8319814205169678,
|
|
"reward_std": 0.1857275605201721,
|
|
"rewards/accuracy_reward": 0.5822916686534881,
|
|
"rewards/brier_reward": 0.7326545953750611,
|
|
"rewards/confidence_uniqueness_reward": 0.6608519792556763,
|
|
"rewards/format_reward": 0.9865451455116272,
|
|
"rewards/frontier_coverage_0": -0.014849835354834796,
|
|
"rewards/frontier_coverage_1": -0.014849835354834796,
|
|
"rewards/frontier_coverage_10": -0.014849835354834796,
|
|
"rewards/frontier_coverage_15": -0.014849835354834796,
|
|
"rewards/frontier_coverage_20": -0.014849835354834796,
|
|
"rewards/frontier_coverage_25": -0.014849835354834796,
|
|
"rewards/frontier_coverage_5": -0.014849835354834796,
|
|
"rewards/frontier_entropy_batch_reward": -0.9030117869377137,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.24696180522441863,
|
|
"signal/accuracy_reward/group_std_mean": 0.3145732879638672,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.15555555671453475,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9943392634391784,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.12348090261220931,
|
|
"signal/advantage_abs_mean": 0.7143722534179687,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14123885333538055,
|
|
"signal/advantage_pre_scale_std": 0.19914124310016632,
|
|
"signal/advantage_std": 0.9836650371551514,
|
|
"signal/brier_reward/centered_abs_mean": 0.1310290887951851,
|
|
"signal/brier_reward/group_std_mean": 0.1702386736869812,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.10493465960025787,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013102908991277218,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1781868025660515,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.21007861495018004,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.14442408829927444,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01781868040561676,
|
|
"signal/format_reward/centered_abs_mean": 0.02363823801279068,
|
|
"signal/format_reward/group_std_mean": 0.04480181857943535,
|
|
"signal/format_reward/group_zero_std_frac": 0.8166666626930237,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09280302375555038,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01181911900639534,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.08157578110694885,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.10721786618232727,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.009440916776657104,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0011665337020531296,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.08157578110694885,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.10721786618232727,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.009440916776657104,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0011665337020531296,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.08157578110694885,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.10721786618232727,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.009440916776657104,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011665337020531296,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08157578110694885,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10721786618232727,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.009440916776657104,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011665337020531296,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08157578110694885,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10721786618232727,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009440916776657104,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011665337020531296,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08157578110694885,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10721786618232727,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.009440916776657104,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011665337020531296,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.08157578110694885,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.10721786618232727,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.009440916776657104,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0011665337020531296,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.16028570830821992,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.27135405838489535,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.15833333898335694,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.12926736772060393,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.016028571128845214,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28283994378691124,
|
|
"calibration/batch_distribution_entropy": 0.8098021020102317,
|
|
"calibration/buffer_distribution_entropy": 0.6103832596029344,
|
|
"calibration/confidence_entropy": 0.5821678574801844,
|
|
"calibration/coverage@0%": 0.006513023807924029,
|
|
"calibration/coverage@1%": 0.006513023807924029,
|
|
"calibration/coverage@10%": 0.023857197249658447,
|
|
"calibration/coverage@15%": 0.03540575368010464,
|
|
"calibration/coverage@20%": 0.18533627087873336,
|
|
"calibration/coverage@25%": 0.4079531033741429,
|
|
"calibration/coverage@30%": 0.5827169258003846,
|
|
"calibration/coverage@5%": 0.01681112678895384,
|
|
"calibration/ece": 0.1065982830413967,
|
|
"calibration/mean_confidence": 0.6392839596712477,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015885416666666673,
|
|
"completions/max_length": 3756.2,
|
|
"completions/max_terminated_length": 3756.2,
|
|
"completions/mean_length": 629.6125,
|
|
"completions/mean_terminated_length": 639.7435302734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.6,
|
|
"epoch": 0.07199910001124986,
|
|
"grad_norm": 0.0025920316111296415,
|
|
"learning_rate": 3.5714285714285718e-06,
|
|
"loss": -0.0323,
|
|
"num_tokens": 53860196.0,
|
|
"reward": 0.8962351679801941,
|
|
"reward_std": 0.18339000940322875,
|
|
"rewards/accuracy_reward": 0.6081597208976746,
|
|
"rewards/brier_reward": 0.7350787162780762,
|
|
"rewards/confidence_uniqueness_reward": 0.8857907652854919,
|
|
"rewards/format_reward": 0.9823784708976746,
|
|
"rewards/frontier_coverage_0": -0.024395102635025978,
|
|
"rewards/frontier_coverage_1": -0.024395102635025978,
|
|
"rewards/frontier_coverage_10": -0.024395102635025978,
|
|
"rewards/frontier_coverage_15": -0.024395102635025978,
|
|
"rewards/frontier_coverage_20": -0.024395102635025978,
|
|
"rewards/frontier_coverage_25": -0.024395102635025978,
|
|
"rewards/frontier_coverage_5": -0.024395102635025978,
|
|
"rewards/frontier_entropy_batch_reward": -0.5867892920970916,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2335611939430237,
|
|
"signal/accuracy_reward/group_std_mean": 0.29245399236679076,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.23055555522441865,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.932023000717163,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11678059697151184,
|
|
"signal/advantage_abs_mean": 0.7472962260246276,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1410281091928482,
|
|
"signal/advantage_pre_scale_std": 0.2018715351819992,
|
|
"signal/advantage_std": 0.9836687922477723,
|
|
"signal/brier_reward/centered_abs_mean": 0.1558063119649887,
|
|
"signal/brier_reward/group_std_mean": 0.19716133773326874,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.12757135629653932,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015580631978809833,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07879094183444976,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10910149812698364,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.060947873443365094,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00787909417413175,
|
|
"signal/format_reward/centered_abs_mean": 0.03021375872194767,
|
|
"signal/format_reward/group_std_mean": 0.056160366535186766,
|
|
"signal/format_reward/group_zero_std_frac": 0.7750000119209289,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.12186049222946167,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.015106879360973834,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.12939045429229737,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17340194284915925,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015195189043879509,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018502835649996997,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12939045429229737,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17340194284915925,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.015195189043879509,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018502835649996997,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12939045429229737,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17340194284915925,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015195189043879509,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018502835649996997,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12939045429229737,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17340194284915925,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015195189043879509,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018502835649996997,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.12939045429229737,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.17340194284915925,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015195189043879509,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018502835649996997,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12939045429229737,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.17340194284915925,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015195189043879509,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018502835649996997,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12939045429229737,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17340194284915925,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015195189043879509,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018502835649996997,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4059493899345398,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4744054675102234,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.32940598130226134,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04059493914246559,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26349164441341744,
|
|
"calibration/batch_distribution_entropy": 0.9722011088416853,
|
|
"calibration/buffer_distribution_entropy": 0.7147490484587777,
|
|
"calibration/confidence_entropy": 0.4716542473597789,
|
|
"calibration/coverage@0%": 0.010033409527924178,
|
|
"calibration/coverage@1%": 0.010033409527924178,
|
|
"calibration/coverage@10%": 0.04262926925598449,
|
|
"calibration/coverage@15%": 0.07320457015039024,
|
|
"calibration/coverage@20%": 0.11014567619634501,
|
|
"calibration/coverage@25%": 0.57302513015744,
|
|
"calibration/coverage@30%": 0.8417191254787083,
|
|
"calibration/coverage@5%": 0.010033409527924178,
|
|
"calibration/ece": 0.2408688157230487,
|
|
"calibration/mean_confidence": 0.5653689750040694,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02152777777777779,
|
|
"completions/max_length": 3726.4,
|
|
"completions/max_terminated_length": 3726.4,
|
|
"completions/mean_length": 640.2627685546875,
|
|
"completions/mean_terminated_length": 654.4486206054687,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 180.8,
|
|
"epoch": 0.08399895001312484,
|
|
"grad_norm": 0.00288687227293849,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": -0.0519,
|
|
"num_tokens": 64313463.0,
|
|
"reward": 0.9324461936950683,
|
|
"reward_std": 0.1782814681529999,
|
|
"rewards/accuracy_reward": 0.6236111164093018,
|
|
"rewards/brier_reward": 0.6862263441085815,
|
|
"rewards/confidence_uniqueness_reward": 0.9285731554031372,
|
|
"rewards/format_reward": 0.9761284589767456,
|
|
"rewards/frontier_coverage_0": -0.047293629869818686,
|
|
"rewards/frontier_coverage_1": -0.047293629869818686,
|
|
"rewards/frontier_coverage_10": -0.047293629869818686,
|
|
"rewards/frontier_coverage_15": -0.047293629869818686,
|
|
"rewards/frontier_coverage_20": -0.047293629869818686,
|
|
"rewards/frontier_coverage_25": -0.047293629869818686,
|
|
"rewards/frontier_coverage_5": -0.047293629869818686,
|
|
"rewards/frontier_entropy_batch_reward": -0.2416945517063141,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2194552928209305,
|
|
"signal/accuracy_reward/group_std_mean": 0.28137104511260985,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.23055555522441865,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8508251547813416,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10972764641046524,
|
|
"signal/advantage_abs_mean": 0.747930896282196,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13469484448432922,
|
|
"signal/advantage_pre_scale_std": 0.19468034505844117,
|
|
"signal/advantage_std": 0.983700430393219,
|
|
"signal/brier_reward/centered_abs_mean": 0.2539998531341553,
|
|
"signal/brier_reward/group_std_mean": 0.30160382986068723,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19768215715885162,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.025399985909461974,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.047507094591856,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08019827008247375,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03670351468026638,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004750709515064955,
|
|
"signal/format_reward/centered_abs_mean": 0.0376356340944767,
|
|
"signal/format_reward/group_std_mean": 0.06864920854568482,
|
|
"signal/format_reward/group_zero_std_frac": 0.7277777910232544,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.14402690380811692,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01881781704723835,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2695829331874847,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3519932210445404,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029944488778710365,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0038550359196960924,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2695829331874847,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3519932210445404,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029944488778710365,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038550359196960924,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2695829331874847,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3519932210445404,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.029944488778710365,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038550359196960924,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2695829331874847,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3519932210445404,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.029944488778710365,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038550359196960924,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2695829331874847,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3519932210445404,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.029944488778710365,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038550359196960924,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2695829331874847,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3519932210445404,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029944488778710365,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038550359196960924,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2695829331874847,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3519932210445404,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.029944488778710365,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038550359196960924,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3358907103538513,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40937405824661255,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.26255030035972593,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033589070290327074,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2812089130282969,
|
|
"calibration/batch_distribution_entropy": 0.9378820973722213,
|
|
"calibration/buffer_distribution_entropy": 0.7788346026193121,
|
|
"calibration/confidence_entropy": 0.5052747431569691,
|
|
"calibration/coverage@0%": 0.011617889637609121,
|
|
"calibration/coverage@1%": 0.011617889637609121,
|
|
"calibration/coverage@10%": 0.02006814162348116,
|
|
"calibration/coverage@15%": 0.04699503081949964,
|
|
"calibration/coverage@20%": 0.21852156777167192,
|
|
"calibration/coverage@25%": 0.3002575847416842,
|
|
"calibration/coverage@30%": 0.6493828972901323,
|
|
"calibration/coverage@5%": 0.011617889637609121,
|
|
"calibration/ece": 0.18469349731283605,
|
|
"calibration/mean_confidence": 0.6265628071196891,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017881944444444443,
|
|
"completions/max_length": 3601.8,
|
|
"completions/max_terminated_length": 3601.8,
|
|
"completions/mean_length": 686.0500854492187,
|
|
"completions/mean_terminated_length": 698.4910034179687,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 203.2,
|
|
"epoch": 0.09599880001499982,
|
|
"grad_norm": 0.0052315001375973225,
|
|
"learning_rate": 4.761904761904762e-06,
|
|
"loss": -0.0494,
|
|
"num_tokens": 75336280.0,
|
|
"reward": 0.9460026144981384,
|
|
"reward_std": 0.16873830258846284,
|
|
"rewards/accuracy_reward": 0.6516493201255799,
|
|
"rewards/brier_reward": 0.7267241477966309,
|
|
"rewards/confidence_uniqueness_reward": 0.9280878067016601,
|
|
"rewards/format_reward": 0.9811632037162781,
|
|
"rewards/frontier_coverage_0": -0.03641742318868637,
|
|
"rewards/frontier_coverage_1": -0.03641742318868637,
|
|
"rewards/frontier_coverage_10": -0.03641742318868637,
|
|
"rewards/frontier_coverage_15": -0.03641742318868637,
|
|
"rewards/frontier_coverage_20": -0.03641742318868637,
|
|
"rewards/frontier_coverage_25": -0.03641742318868637,
|
|
"rewards/frontier_coverage_5": -0.03641742318868637,
|
|
"rewards/frontier_entropy_batch_reward": -0.3223945081233978,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19728189706802368,
|
|
"signal/accuracy_reward/group_std_mean": 0.257595032453537,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2861111104488373,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9428304195404053,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09864094853401184,
|
|
"signal/advantage_abs_mean": 0.7440566301345826,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1264283075928688,
|
|
"signal/advantage_pre_scale_std": 0.19226027727127076,
|
|
"signal/advantage_std": 0.9835219383239746,
|
|
"signal/brier_reward/centered_abs_mean": 0.2110671579837799,
|
|
"signal/brier_reward/group_std_mean": 0.2574485570192337,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20208889842033387,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021106715872883798,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04523418918251991,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07450791597366332,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.043287652730941775,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045234191231429575,
|
|
"signal/format_reward/centered_abs_mean": 0.03200412429869175,
|
|
"signal/format_reward/group_std_mean": 0.05935907438397407,
|
|
"signal/format_reward/group_zero_std_frac": 0.7611111164093017,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.15341382324695588,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.016002062149345873,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1926664799451828,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.26043030619621277,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02639569416642189,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002755130687728524,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1926664799451828,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26043030619621277,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02639569416642189,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002755130687728524,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1926664799451828,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26043030619621277,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02639569416642189,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002755130687728524,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1926664799451828,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26043030619621277,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02639569416642189,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002755130687728524,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1926664799451828,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.26043030619621277,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02639569416642189,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002755130687728524,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1926664799451828,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.26043030619621277,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02639569416642189,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002755130687728524,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1926664799451828,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.26043030619621277,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02639569416642189,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002755130687728524,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38592681884765623,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4502368450164795,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.36914966702461244,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038592683523893355,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18182826496863658,
|
|
"calibration/batch_distribution_entropy": 0.9293780132057007,
|
|
"calibration/buffer_distribution_entropy": 0.8135788097220038,
|
|
"calibration/confidence_entropy": 0.4995505573094066,
|
|
"calibration/coverage@0%": 0.014873545187723889,
|
|
"calibration/coverage@1%": 0.014873545187723889,
|
|
"calibration/coverage@10%": 0.13937415970478578,
|
|
"calibration/coverage@15%": 0.3561157115232597,
|
|
"calibration/coverage@20%": 0.6580942309690091,
|
|
"calibration/coverage@25%": 0.9673320588192078,
|
|
"calibration/coverage@30%": 1.0,
|
|
"calibration/coverage@5%": 0.05969262413509231,
|
|
"calibration/ece": 0.16737471673810952,
|
|
"calibration/mean_confidence": 0.626975023284362,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013888888888888905,
|
|
"completions/max_length": 3784.0,
|
|
"completions/max_terminated_length": 3784.0,
|
|
"completions/mean_length": 739.9791625976562,
|
|
"completions/mean_terminated_length": 750.4744018554687,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 252.6,
|
|
"epoch": 0.1079986500168748,
|
|
"grad_norm": 0.002621802967041731,
|
|
"learning_rate": 4.909638554216868e-06,
|
|
"loss": -0.0319,
|
|
"num_tokens": 86996104.0,
|
|
"reward": 0.9631305456161499,
|
|
"reward_std": 0.1516349971294403,
|
|
"rewards/accuracy_reward": 0.66796875,
|
|
"rewards/brier_reward": 0.762067437171936,
|
|
"rewards/confidence_uniqueness_reward": 0.9327586412429809,
|
|
"rewards/format_reward": 0.98515625,
|
|
"rewards/frontier_coverage_0": -0.008925668522715568,
|
|
"rewards/frontier_coverage_1": -0.008925668522715568,
|
|
"rewards/frontier_coverage_10": -0.008925668522715568,
|
|
"rewards/frontier_coverage_15": -0.008925668522715568,
|
|
"rewards/frontier_coverage_20": -0.008925668522715568,
|
|
"rewards/frontier_coverage_25": -0.008925668522715568,
|
|
"rewards/frontier_coverage_5": -0.008925668522715568,
|
|
"rewards/frontier_entropy_batch_reward": -0.3202110558748245,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18296983540058137,
|
|
"signal/accuracy_reward/group_std_mean": 0.24478627741336823,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.29722222983837127,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9449079632759094,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09148491770029069,
|
|
"signal/advantage_abs_mean": 0.7456163048744202,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11277087926864623,
|
|
"signal/advantage_pre_scale_std": 0.1716623306274414,
|
|
"signal/advantage_std": 0.9834415912628174,
|
|
"signal/brier_reward/centered_abs_mean": 0.19162435531616212,
|
|
"signal/brier_reward/group_std_mean": 0.23700920343399048,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19831233322620392,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019162436202168464,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037688417732715605,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06137025505304337,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03864099867641926,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037688419222831728,
|
|
"signal/format_reward/centered_abs_mean": 0.02428927905857563,
|
|
"signal/format_reward/group_std_mean": 0.04575216062366962,
|
|
"signal/format_reward/group_zero_std_frac": 0.8083333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1223247617483139,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012144639529287815,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20233065783977508,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2709280252456665,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029919801652431487,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028933283407241105,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20233065783977508,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2709280252456665,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029919801652431487,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028933283407241105,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20233065783977508,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2709280252456665,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.029919801652431487,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028933283407241105,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20233065783977508,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2709280252456665,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.029919801652431487,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028933283407241105,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20233065783977508,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2709280252456665,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.029919801652431487,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028933283407241105,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20233065783977508,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2709280252456665,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029919801652431487,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028933283407241105,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20233065783977508,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2709280252456665,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.029919801652431487,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028933283407241105,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37160670161247256,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.43908803462982177,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.38857935070991517,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03716067224740982,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.355543148661393,
|
|
"calibration/batch_distribution_entropy": 0.9560598149241851,
|
|
"calibration/buffer_distribution_entropy": 0.8409339110002637,
|
|
"calibration/confidence_entropy": 0.46758599740547824,
|
|
"calibration/coverage@0%": 0.0110337334407132,
|
|
"calibration/coverage@1%": 0.0110337334407132,
|
|
"calibration/coverage@10%": 0.011561437926201327,
|
|
"calibration/coverage@15%": 0.03893921379876839,
|
|
"calibration/coverage@20%": 0.09381513646509912,
|
|
"calibration/coverage@25%": 0.2094168255802026,
|
|
"calibration/coverage@30%": 0.4347323189967276,
|
|
"calibration/coverage@5%": 0.0110337334407132,
|
|
"calibration/ece": 0.1621294484579252,
|
|
"calibration/mean_confidence": 0.5582715356184847,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009895833333333348,
|
|
"completions/max_length": 3273.0,
|
|
"completions/max_terminated_length": 3273.0,
|
|
"completions/mean_length": 710.214501953125,
|
|
"completions/mean_terminated_length": 717.2854614257812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 228.6,
|
|
"epoch": 0.11999850001874976,
|
|
"grad_norm": 0.003402084344998002,
|
|
"learning_rate": 4.759036144578314e-06,
|
|
"loss": -0.0296,
|
|
"num_tokens": 98275375.0,
|
|
"reward": 0.9638577103614807,
|
|
"reward_std": 0.14062503576278687,
|
|
"rewards/accuracy_reward": 0.6588541626930237,
|
|
"rewards/brier_reward": 0.7617009282112122,
|
|
"rewards/confidence_uniqueness_reward": 0.9369927644729614,
|
|
"rewards/format_reward": 0.9899305582046509,
|
|
"rewards/frontier_coverage_0": 0.007022621482610703,
|
|
"rewards/frontier_coverage_1": 0.007022621482610703,
|
|
"rewards/frontier_coverage_10": 0.007022621482610703,
|
|
"rewards/frontier_coverage_15": 0.007022621482610703,
|
|
"rewards/frontier_coverage_20": 0.007022621482610703,
|
|
"rewards/frontier_coverage_25": 0.007022621482610703,
|
|
"rewards/frontier_coverage_5": 0.007022621482610703,
|
|
"rewards/frontier_entropy_batch_reward": -0.3110700786113739,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1725911468267441,
|
|
"signal/accuracy_reward/group_std_mean": 0.22528342604637147,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3611111223697662,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9741186976432801,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08629557341337205,
|
|
"signal/advantage_abs_mean": 0.7553925156593323,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10563597530126571,
|
|
"signal/advantage_pre_scale_std": 0.16184936761856078,
|
|
"signal/advantage_std": 0.9833512544631958,
|
|
"signal/brier_reward/centered_abs_mean": 0.18118281662464142,
|
|
"signal/brier_reward/group_std_mean": 0.22774460315704345,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20505461990833282,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01811828128993511,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031979148462414744,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05420147180557251,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03622420057654381,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031979148741811516,
|
|
"signal/format_reward/centered_abs_mean": 0.017957899160683154,
|
|
"signal/format_reward/group_std_mean": 0.037631581723690036,
|
|
"signal/format_reward/group_zero_std_frac": 0.830555546283722,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10187934935092927,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008978949580341577,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21007080078125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.27531993985176084,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034024206921458244,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030040125828236343,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21007080078125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27531993985176084,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034024206921458244,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030040125828236343,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21007080078125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27531993985176084,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.034024206921458244,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030040125828236343,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21007080078125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27531993985176084,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.034024206921458244,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030040125828236343,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21007080078125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27531993985176084,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.034024206921458244,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030040125828236343,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21007080078125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27531993985176084,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.034024206921458244,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030040125828236343,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21007080078125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27531993985176084,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.034024206921458244,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030040125828236343,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36411572694778443,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4315321445465088,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4120087444782257,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03641157373785973,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.11999850001874976,
|
|
"eval_calibration/aurc": 0.18507757147473192,
|
|
"eval_calibration/batch_distribution_entropy": 0.9183594624506147,
|
|
"eval_calibration/buffer_distribution_entropy": 0.8576227601929546,
|
|
"eval_calibration/confidence_entropy": 0.5061289053377749,
|
|
"eval_calibration/coverage@0%": 0.17943548387096775,
|
|
"eval_calibration/coverage@1%": 0.17943548387096775,
|
|
"eval_calibration/coverage@10%": 0.3020833333333333,
|
|
"eval_calibration/coverage@15%": 0.4437163978494623,
|
|
"eval_calibration/coverage@20%": 0.6270161290322581,
|
|
"eval_calibration/coverage@25%": 0.8429099462365591,
|
|
"eval_calibration/coverage@30%": 0.9479166666666666,
|
|
"eval_calibration/coverage@5%": 0.17943548387096775,
|
|
"eval_calibration/ece": 0.232350307883931,
|
|
"eval_calibration/mean_confidence": 0.5651359238441985,
|
|
"eval_completions/clipped_ratio": 0.009375000000000003,
|
|
"eval_completions/max_length": 2114.1666666666665,
|
|
"eval_completions/max_terminated_length": 2114.1666666666665,
|
|
"eval_completions/mean_length": 693.6932373046875,
|
|
"eval_completions/mean_terminated_length": 700.2240702311198,
|
|
"eval_completions/min_length": 72.33333333333333,
|
|
"eval_completions/min_terminated_length": 265.6666666666667,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 98275375.0,
|
|
"eval_reward": 0.8912924925486246,
|
|
"eval_reward_std": 0.23265416423479715,
|
|
"eval_rewards/accuracy_reward": 0.6527777711550394,
|
|
"eval_rewards/brier_reward": 0.7764979799588522,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8910275399684906,
|
|
"eval_rewards/format_reward": 0.9930555522441864,
|
|
"eval_rewards/frontier_coverage_0": 0.009279087030639252,
|
|
"eval_rewards/frontier_coverage_1": 0.009279087030639252,
|
|
"eval_rewards/frontier_coverage_10": 0.009279087030639252,
|
|
"eval_rewards/frontier_coverage_15": 0.009279087030639252,
|
|
"eval_rewards/frontier_coverage_20": 0.009279087030639252,
|
|
"eval_rewards/frontier_coverage_25": 0.009279087030639252,
|
|
"eval_rewards/frontier_coverage_5": 0.009279087030639252,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9930555522441864,
|
|
"eval_runtime": 173.0155,
|
|
"eval_samples_per_second": 5.78,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4415147602558136,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4768268217643102,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9577702283859253,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2207573801279068,
|
|
"eval_signal/advantage_abs_mean": 0.8893506626288096,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2068815752863884,
|
|
"eval_signal/advantage_pre_scale_std": 0.2305774266521136,
|
|
"eval_signal/advantage_std": 0.9863962332407633,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.19041885187228522,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2465388998389244,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08267416805028915,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019041885621845722,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.047221081952253975,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07311302361389001,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02038925824066003,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004722108171942334,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.013454860852410397,
|
|
"eval_signal/format_reward/group_std_mean": 0.03928370991100868,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.7777777910232544,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.028587787101666134,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.006727430426205198,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.24869261930386224,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.35074693461259204,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015456531352053085,
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035563044948503375,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.24869261930386224,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.35074693461259204,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.015456531352053085,
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035563044948503375,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.24869261930386224,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.35074693461259204,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015456531352053085,
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035563044948503375,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.24869261930386224,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.35074693461259204,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015456531352053085,
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035563044948503375,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.24869261930386224,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.35074693461259204,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015456531352053085,
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035563044948503375,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.24869261930386224,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.35074693461259204,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015456531352053085,
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035563044948503375,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.24869261930386224,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.35074693461259204,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015456531352053085,
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035563044948503375,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.013454860852410397,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.03928370991100868,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7777777910232544,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.005717557234068711,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0013454861667317648,
|
|
"eval_steps_per_second": 0.035,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2572277962003569,
|
|
"calibration/batch_distribution_entropy": 0.9547779041044873,
|
|
"calibration/buffer_distribution_entropy": 0.8663927585115305,
|
|
"calibration/confidence_entropy": 0.5224142559254161,
|
|
"calibration/coverage@0%": 0.010124414417387751,
|
|
"calibration/coverage@1%": 0.010124414417387751,
|
|
"calibration/coverage@10%": 0.02617453887022441,
|
|
"calibration/coverage@15%": 0.19295365729541794,
|
|
"calibration/coverage@20%": 0.3283553771989877,
|
|
"calibration/coverage@25%": 0.5176317108323689,
|
|
"calibration/coverage@30%": 0.7306122575305988,
|
|
"calibration/coverage@5%": 0.010124414417387751,
|
|
"calibration/ece": 0.13155749677237122,
|
|
"calibration/mean_confidence": 0.5880504501565289,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009461805555555581,
|
|
"completions/max_length": 3255.2,
|
|
"completions/max_terminated_length": 3255.2,
|
|
"completions/mean_length": 727.8955688476562,
|
|
"completions/mean_terminated_length": 734.931005859375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 191.4,
|
|
"epoch": 0.13199835002062474,
|
|
"grad_norm": 0.0026916302740573883,
|
|
"learning_rate": 4.60843373493976e-06,
|
|
"loss": -0.0219,
|
|
"num_tokens": 109741308.0,
|
|
"reward": 0.9618936777114868,
|
|
"reward_std": 0.13674592971801758,
|
|
"rewards/accuracy_reward": 0.6470486044883728,
|
|
"rewards/brier_reward": 0.7674099326133728,
|
|
"rewards/confidence_uniqueness_reward": 0.9404424667358399,
|
|
"rewards/format_reward": 0.9904513955116272,
|
|
"rewards/frontier_coverage_0": 0.001309068128466606,
|
|
"rewards/frontier_coverage_1": 0.001309068128466606,
|
|
"rewards/frontier_coverage_10": 0.001309068128466606,
|
|
"rewards/frontier_coverage_15": 0.001309068128466606,
|
|
"rewards/frontier_coverage_20": 0.001309068128466606,
|
|
"rewards/frontier_coverage_25": 0.001309068128466606,
|
|
"rewards/frontier_coverage_5": 0.001309068128466606,
|
|
"rewards/frontier_entropy_batch_reward": -0.2777259886264801,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16701388657093047,
|
|
"signal/accuracy_reward/group_std_mean": 0.2208912193775177,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9170358538627624,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08350694328546523,
|
|
"signal/advantage_abs_mean": 0.7515268087387085,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10258873105049134,
|
|
"signal/advantage_pre_scale_std": 0.15661307275295258,
|
|
"signal/advantage_std": 0.9833774209022522,
|
|
"signal/brier_reward/centered_abs_mean": 0.16898567974567413,
|
|
"signal/brier_reward/group_std_mean": 0.21254501342773438,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18657754361629486,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016898567974567413,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02872337996959686,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.047750599682331085,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03172791600227356,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028723380994051693,
|
|
"signal/format_reward/centered_abs_mean": 0.016373698227107526,
|
|
"signal/format_reward/group_std_mean": 0.032994627952575684,
|
|
"signal/format_reward/group_zero_std_frac": 0.8583333373069764,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09072078242897988,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008186849113553763,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19720979034900665,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2574134826660156,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.031110198795795442,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028200999833643435,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19720979034900665,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2574134826660156,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.031110198795795442,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028200999833643435,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19720979034900665,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2574134826660156,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.031110198795795442,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028200999833643435,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19720979034900665,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2574134826660156,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.031110198795795442,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028200999833643435,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19720979034900665,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2574134826660156,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.031110198795795442,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028200999833643435,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19720979034900665,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2574134826660156,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031110198795795442,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028200999833643435,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19720979034900665,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2574134826660156,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.031110198795795442,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028200999833643435,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3412093102931976,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41135616302490235,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3766399085521698,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034120932966470716,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27743387615755427,
|
|
"calibration/batch_distribution_entropy": 0.9583523833618678,
|
|
"calibration/buffer_distribution_entropy": 0.8804019885998695,
|
|
"calibration/confidence_entropy": 0.466344529326517,
|
|
"calibration/coverage@0%": 0.013651509291601752,
|
|
"calibration/coverage@1%": 0.013651509291601752,
|
|
"calibration/coverage@10%": 0.15538102293714737,
|
|
"calibration/coverage@15%": 0.37334421356472086,
|
|
"calibration/coverage@20%": 0.45284050566706346,
|
|
"calibration/coverage@25%": 0.5141313051302937,
|
|
"calibration/coverage@30%": 0.6727813439434129,
|
|
"calibration/coverage@5%": 0.019410671595266674,
|
|
"calibration/ece": 0.1604155313309958,
|
|
"calibration/mean_confidence": 0.5627900217322679,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01449652777777779,
|
|
"completions/max_length": 3593.0,
|
|
"completions/max_terminated_length": 3593.0,
|
|
"completions/mean_length": 727.8925415039063,
|
|
"completions/mean_terminated_length": 738.712353515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 185.8,
|
|
"epoch": 0.14399820002249972,
|
|
"grad_norm": 0.0024423820432275534,
|
|
"learning_rate": 4.457831325301205e-06,
|
|
"loss": -0.0326,
|
|
"num_tokens": 121223206.0,
|
|
"reward": 0.9519212007522583,
|
|
"reward_std": 0.14070754647254943,
|
|
"rewards/accuracy_reward": 0.6276041746139527,
|
|
"rewards/brier_reward": 0.762453269958496,
|
|
"rewards/confidence_uniqueness_reward": 0.9355636954307556,
|
|
"rewards/format_reward": 0.9853298425674438,
|
|
"rewards/frontier_coverage_0": 0.025408835709095003,
|
|
"rewards/frontier_coverage_1": 0.025408835709095003,
|
|
"rewards/frontier_coverage_10": 0.025408835709095003,
|
|
"rewards/frontier_coverage_15": 0.025408835709095003,
|
|
"rewards/frontier_coverage_20": 0.025408835709095003,
|
|
"rewards/frontier_coverage_25": 0.025408835709095003,
|
|
"rewards/frontier_coverage_5": 0.025408835709095003,
|
|
"rewards/frontier_entropy_batch_reward": -0.2689092069864273,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1701822906732559,
|
|
"signal/accuracy_reward/group_std_mean": 0.2256343573331833,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3583333373069763,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9568945646286011,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08509114533662795,
|
|
"signal/advantage_abs_mean": 0.7413867354393006,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10248211473226547,
|
|
"signal/advantage_pre_scale_std": 0.16330770254135132,
|
|
"signal/advantage_std": 0.9833378672599793,
|
|
"signal/brier_reward/centered_abs_mean": 0.18149828016757966,
|
|
"signal/brier_reward/group_std_mean": 0.22987159788608552,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20609477162361145,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018149828910827635,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03701090067625046,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06427684798836708,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04217044934630394,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037010901141911745,
|
|
"signal/format_reward/centered_abs_mean": 0.02552625834941864,
|
|
"signal/format_reward/group_std_mean": 0.05108080431818962,
|
|
"signal/format_reward/group_zero_std_frac": 0.7805555701255799,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.14410489052534103,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01276312917470932,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.22716614007949829,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2958860158920288,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03683609813451767,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003248475771397352,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22716614007949829,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2958860158920288,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03683609813451767,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003248475771397352,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22716614007949829,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2958860158920288,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03683609813451767,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003248475771397352,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22716614007949829,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2958860158920288,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03683609813451767,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003248475771397352,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22716614007949829,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2958860158920288,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03683609813451767,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003248475771397352,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22716614007949829,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2958860158920288,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03683609813451767,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003248475771397352,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22716614007949829,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2958860158920288,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03683609813451767,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003248475771397352,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3329376816749573,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4049929976463318,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3823388457298279,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03329376950860023,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24187817445696086,
|
|
"calibration/batch_distribution_entropy": 0.9537951149289101,
|
|
"calibration/buffer_distribution_entropy": 0.895752842577633,
|
|
"calibration/confidence_entropy": 0.5066892649603131,
|
|
"calibration/coverage@0%": 0.014859234868555388,
|
|
"calibration/coverage@1%": 0.014859234868555388,
|
|
"calibration/coverage@10%": 0.1557253084507572,
|
|
"calibration/coverage@15%": 0.22767872436187964,
|
|
"calibration/coverage@20%": 0.5952973900478373,
|
|
"calibration/coverage@25%": 0.6492208361304835,
|
|
"calibration/coverage@30%": 0.7082545960594742,
|
|
"calibration/coverage@5%": 0.07725923486855539,
|
|
"calibration/ece": 0.14400525368708186,
|
|
"calibration/mean_confidence": 0.5951536858486229,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011979166666666652,
|
|
"completions/max_length": 3382.8,
|
|
"completions/max_terminated_length": 3382.8,
|
|
"completions/mean_length": 631.187939453125,
|
|
"completions/mean_terminated_length": 638.8400756835938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 141.2,
|
|
"epoch": 0.1559980500243747,
|
|
"grad_norm": 0.0027344543486833572,
|
|
"learning_rate": 4.307228915662651e-06,
|
|
"loss": -0.0313,
|
|
"num_tokens": 131588539.0,
|
|
"reward": 0.9630724668502808,
|
|
"reward_std": 0.13918745368719102,
|
|
"rewards/accuracy_reward": 0.6561631917953491,
|
|
"rewards/brier_reward": 0.7739776849746705,
|
|
"rewards/confidence_uniqueness_reward": 0.9360240459442138,
|
|
"rewards/format_reward": 0.9877604246139526,
|
|
"rewards/frontier_coverage_0": 0.011109796725213528,
|
|
"rewards/frontier_coverage_1": 0.011109796725213528,
|
|
"rewards/frontier_coverage_10": 0.011109796725213528,
|
|
"rewards/frontier_coverage_15": 0.011109796725213528,
|
|
"rewards/frontier_coverage_20": 0.011109796725213528,
|
|
"rewards/frontier_coverage_25": 0.011109796725213528,
|
|
"rewards/frontier_coverage_5": 0.011109796725213528,
|
|
"rewards/frontier_entropy_batch_reward": -0.31001612544059753,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15845811367034912,
|
|
"signal/accuracy_reward/group_std_mean": 0.21250716745853424,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9355194449424744,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07922905683517456,
|
|
"signal/advantage_abs_mean": 0.7518455505371093,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10312150418758392,
|
|
"signal/advantage_pre_scale_std": 0.1631181061267853,
|
|
"signal/advantage_std": 0.9832925438880921,
|
|
"signal/brier_reward/centered_abs_mean": 0.17200563251972198,
|
|
"signal/brier_reward/group_std_mean": 0.21544553339481354,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20448561310768126,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017200562357902526,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03373164795339108,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05614056885242462,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04000279903411865,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033731648698449137,
|
|
"signal/format_reward/centered_abs_mean": 0.02146809957921505,
|
|
"signal/format_reward/group_std_mean": 0.04147007092833519,
|
|
"signal/format_reward/group_zero_std_frac": 0.8277777791023254,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.12673527002334595,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010734049789607524,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18637515604496002,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24511989057064057,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03161940351128578,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026651647873222827,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18637515604496002,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24511989057064057,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03161940351128578,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026651647873222827,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18637515604496002,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24511989057064057,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03161940351128578,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026651647873222827,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18637515604496002,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24511989057064057,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03161940351128578,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026651647873222827,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18637515604496002,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24511989057064057,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03161940351128578,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026651647873222827,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18637515604496002,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24511989057064057,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03161940351128578,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026651647873222827,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18637515604496002,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24511989057064057,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03161940351128578,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026651647873222827,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35859541296958924,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.42603600025177,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4278856158256531,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035859542340040206,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31177324033790227,
|
|
"calibration/batch_distribution_entropy": 0.9377009466084179,
|
|
"calibration/buffer_distribution_entropy": 0.9052739502005067,
|
|
"calibration/confidence_entropy": 0.4595234811538427,
|
|
"calibration/coverage@0%": 0.0125507308012776,
|
|
"calibration/coverage@1%": 0.0125507308012776,
|
|
"calibration/coverage@10%": 0.025171344836365316,
|
|
"calibration/coverage@15%": 0.17121301150303198,
|
|
"calibration/coverage@20%": 0.2777535816784706,
|
|
"calibration/coverage@25%": 0.31907901969633845,
|
|
"calibration/coverage@30%": 0.48218757232791737,
|
|
"calibration/coverage@5%": 0.0125507308012776,
|
|
"calibration/ece": 0.22044916226110992,
|
|
"calibration/mean_confidence": 0.5375950511256701,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0066840277777777905,
|
|
"completions/max_length": 3311.8,
|
|
"completions/max_terminated_length": 3311.8,
|
|
"completions/mean_length": 603.350341796875,
|
|
"completions/mean_terminated_length": 607.4075073242187,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 143.0,
|
|
"epoch": 0.16799790002624967,
|
|
"grad_norm": 0.002717731287702918,
|
|
"learning_rate": 4.156626506024097e-06,
|
|
"loss": -0.0122,
|
|
"num_tokens": 141617279.0,
|
|
"reward": 0.9516151428222657,
|
|
"reward_std": 0.12674596905708313,
|
|
"rewards/accuracy_reward": 0.6421875,
|
|
"rewards/brier_reward": 0.7418337464332581,
|
|
"rewards/confidence_uniqueness_reward": 0.9369692325592041,
|
|
"rewards/format_reward": 0.9933159828186036,
|
|
"rewards/frontier_coverage_0": 0.0047592608723789455,
|
|
"rewards/frontier_coverage_1": 0.0047592608723789455,
|
|
"rewards/frontier_coverage_10": 0.0047592608723789455,
|
|
"rewards/frontier_coverage_15": 0.0047592608723789455,
|
|
"rewards/frontier_coverage_20": 0.0047592608723789455,
|
|
"rewards/frontier_coverage_25": 0.0047592608723789455,
|
|
"rewards/frontier_coverage_5": 0.0047592608723789455,
|
|
"rewards/frontier_entropy_batch_reward": -0.34493361711502074,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16208766996860505,
|
|
"signal/accuracy_reward/group_std_mean": 0.2136603981256485,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9817174792289733,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08104383498430252,
|
|
"signal/advantage_abs_mean": 0.7454643368721008,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09447728544473648,
|
|
"signal/advantage_pre_scale_std": 0.14597638845443725,
|
|
"signal/advantage_std": 0.9832653284072876,
|
|
"signal/brier_reward/centered_abs_mean": 0.19506115317344666,
|
|
"signal/brier_reward/group_std_mean": 0.24160505831241608,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.23666558563709258,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019506115466356277,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02871289774775505,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.045819585025310514,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0351563211530447,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002871289849281311,
|
|
"signal/format_reward/centered_abs_mean": 0.01219075545668602,
|
|
"signal/format_reward/group_std_mean": 0.026185811311006547,
|
|
"signal/format_reward/group_zero_std_frac": 0.8805555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07440270856022835,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00609537772834301,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2429557830095291,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.31135170757770536,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04189819991588593,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034742677584290505,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2429557830095291,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.31135170757770536,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04189819991588593,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034742677584290505,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2429557830095291,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.31135170757770536,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04189819991588593,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034742677584290505,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2429557830095291,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.31135170757770536,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04189819991588593,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034742677584290505,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2429557830095291,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.31135170757770536,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04189819991588593,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034742677584290505,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2429557830095291,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.31135170757770536,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04189819991588593,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034742677584290505,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2429557830095291,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.31135170757770536,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04189819991588593,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034742677584290505,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3646996796131134,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4328969597816467,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4460917890071869,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03646996915340424,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3101440805164673,
|
|
"calibration/batch_distribution_entropy": 0.9050061498877229,
|
|
"calibration/buffer_distribution_entropy": 0.9116176363911908,
|
|
"calibration/confidence_entropy": 0.5082916114231081,
|
|
"calibration/coverage@0%": 0.00576069634986306,
|
|
"calibration/coverage@1%": 0.00576069634986306,
|
|
"calibration/coverage@10%": 0.00576069634986306,
|
|
"calibration/coverage@15%": 0.18960560085429345,
|
|
"calibration/coverage@20%": 0.283362673405381,
|
|
"calibration/coverage@25%": 0.4146926660345467,
|
|
"calibration/coverage@30%": 0.41784227233375937,
|
|
"calibration/coverage@5%": 0.00576069634986306,
|
|
"calibration/ece": 0.197117675594798,
|
|
"calibration/mean_confidence": 0.661174791007752,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.004861111111111138,
|
|
"completions/max_length": 2847.6,
|
|
"completions/max_terminated_length": 2847.6,
|
|
"completions/mean_length": 612.6124267578125,
|
|
"completions/mean_terminated_length": 615.6407348632813,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.8,
|
|
"epoch": 0.17999775002812465,
|
|
"grad_norm": 0.0024783292319625616,
|
|
"learning_rate": 4.006024096385543e-06,
|
|
"loss": -0.0033,
|
|
"num_tokens": 151739470.0,
|
|
"reward": 0.966460108757019,
|
|
"reward_std": 0.13642458617687225,
|
|
"rewards/accuracy_reward": 0.6873263835906982,
|
|
"rewards/brier_reward": 0.7391559958457947,
|
|
"rewards/confidence_uniqueness_reward": 0.9389071345329285,
|
|
"rewards/format_reward": 0.9947048544883728,
|
|
"rewards/frontier_coverage_0": -0.0546910285949707,
|
|
"rewards/frontier_coverage_1": -0.0546910285949707,
|
|
"rewards/frontier_coverage_10": -0.0546910285949707,
|
|
"rewards/frontier_coverage_15": -0.0546910285949707,
|
|
"rewards/frontier_coverage_20": -0.0546910285949707,
|
|
"rewards/frontier_coverage_25": -0.0546910285949707,
|
|
"rewards/frontier_coverage_5": -0.0546910285949707,
|
|
"rewards/frontier_entropy_batch_reward": -0.36887272596359255,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15999349057674409,
|
|
"signal/accuracy_reward/group_std_mean": 0.20950167179107665,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9744468212127686,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07999674528837204,
|
|
"signal/advantage_abs_mean": 0.7634802103042603,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10433225780725479,
|
|
"signal/advantage_pre_scale_std": 0.15965070724487304,
|
|
"signal/advantage_std": 0.9832550525665283,
|
|
"signal/brier_reward/centered_abs_mean": 0.19047823250293733,
|
|
"signal/brier_reward/group_std_mean": 0.2352720856666565,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.23357610106468202,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019047823548316956,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02479529082775116,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04085197448730469,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030193888396024705,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002479529147967696,
|
|
"signal/format_reward/centered_abs_mean": 0.009879557183012366,
|
|
"signal/format_reward/group_std_mean": 0.022797855362296105,
|
|
"signal/format_reward/group_zero_std_frac": 0.8916666746139527,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05883842520415783,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.004939778591506183,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16593956649303437,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2201917886734009,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02901824899017811,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023729358334094288,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16593956649303437,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2201917886734009,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02901824899017811,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023729358334094288,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16593956649303437,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2201917886734009,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02901824899017811,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023729358334094288,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16593956649303437,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2201917886734009,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02901824899017811,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023729358334094288,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16593956649303437,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2201917886734009,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02901824899017811,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023729358334094288,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16593956649303437,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2201917886734009,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02901824899017811,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023729358334094288,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16593956649303437,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2201917886734009,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02901824899017811,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023729358334094288,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3872749865055084,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.44775003790855405,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4765858590602875,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038727499544620514,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24882971050558633,
|
|
"calibration/batch_distribution_entropy": 0.9163427844868991,
|
|
"calibration/buffer_distribution_entropy": 0.9156692618801193,
|
|
"calibration/confidence_entropy": 0.5399646911523172,
|
|
"calibration/coverage@0%": 0.0041666666666666675,
|
|
"calibration/coverage@1%": 0.0041666666666666675,
|
|
"calibration/coverage@10%": 0.01832759186351706,
|
|
"calibration/coverage@15%": 0.2557291666666667,
|
|
"calibration/coverage@20%": 0.40364583333333337,
|
|
"calibration/coverage@25%": 0.6344480340606008,
|
|
"calibration/coverage@30%": 0.7081002920035939,
|
|
"calibration/coverage@5%": 0.0041666666666666675,
|
|
"calibration/ece": 0.1941888822566929,
|
|
"calibration/mean_confidence": 0.6331814034577334,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00868055555555558,
|
|
"completions/max_length": 3604.4,
|
|
"completions/max_terminated_length": 3604.4,
|
|
"completions/mean_length": 645.2096435546875,
|
|
"completions/mean_terminated_length": 650.9556030273437,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 125.2,
|
|
"epoch": 0.19199760002999963,
|
|
"grad_norm": 0.002496064407750964,
|
|
"learning_rate": 3.855421686746989e-06,
|
|
"loss": -0.0084,
|
|
"num_tokens": 162225565.0,
|
|
"reward": 0.9505057215690613,
|
|
"reward_std": 0.1393027275800705,
|
|
"rewards/accuracy_reward": 0.6585069298744202,
|
|
"rewards/brier_reward": 0.729674780368805,
|
|
"rewards/confidence_uniqueness_reward": 0.9360662698745728,
|
|
"rewards/format_reward": 0.9909722208976746,
|
|
"rewards/frontier_coverage_0": -0.051046742522157726,
|
|
"rewards/frontier_coverage_1": -0.051046742522157726,
|
|
"rewards/frontier_coverage_10": -0.051046742522157726,
|
|
"rewards/frontier_coverage_15": -0.051046742522157726,
|
|
"rewards/frontier_coverage_20": -0.051046742522157726,
|
|
"rewards/frontier_coverage_25": -0.051046742522157726,
|
|
"rewards/frontier_coverage_5": -0.051046742522157726,
|
|
"rewards/frontier_entropy_batch_reward": -0.35698198080062865,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1613064229488373,
|
|
"signal/accuracy_reward/group_std_mean": 0.21208280324935913,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40555556416511535,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9074809789657593,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08065321147441865,
|
|
"signal/advantage_abs_mean": 0.7607282400131226,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10646351128816604,
|
|
"signal/advantage_pre_scale_std": 0.16107785999774932,
|
|
"signal/advantage_std": 0.9833507299423218,
|
|
"signal/brier_reward/centered_abs_mean": 0.18236831128597258,
|
|
"signal/brier_reward/group_std_mean": 0.22466041147708893,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20613384544849395,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018236831203103064,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026830673590302466,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04254492111504078,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030164846032857896,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002683067345060408,
|
|
"signal/format_reward/centered_abs_mean": 0.013585069729015232,
|
|
"signal/format_reward/group_std_mean": 0.026397685706615447,
|
|
"signal/format_reward/group_zero_std_frac": 0.8833333492279053,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07591437287628651,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006792534864507616,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16189261376857758,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21290515959262848,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02617349661886692,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023150643799453976,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16189261376857758,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21290515959262848,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02617349661886692,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023150643799453976,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16189261376857758,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21290515959262848,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02617349661886692,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023150643799453976,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16189261376857758,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21290515959262848,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02617349661886692,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023150643799453976,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16189261376857758,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.21290515959262848,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02617349661886692,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023150643799453976,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16189261376857758,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.21290515959262848,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02617349661886692,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023150643799453976,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16189261376857758,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21290515959262848,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02617349661886692,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023150643799453976,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37562611103057864,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4406170785427094,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4253277540206909,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03756261095404625,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3014076575162675,
|
|
"calibration/batch_distribution_entropy": 0.9125055866858618,
|
|
"calibration/buffer_distribution_entropy": 0.9188331792178456,
|
|
"calibration/confidence_entropy": 0.5207189815765981,
|
|
"calibration/coverage@0%": 0.00841005981688481,
|
|
"calibration/coverage@1%": 0.00841005981688481,
|
|
"calibration/coverage@10%": 0.00998486296649111,
|
|
"calibration/coverage@15%": 0.014755210586203521,
|
|
"calibration/coverage@20%": 0.032083614827450556,
|
|
"calibration/coverage@25%": 0.3209187626504969,
|
|
"calibration/coverage@30%": 0.607871104717331,
|
|
"calibration/coverage@5%": 0.00841005981688481,
|
|
"calibration/ece": 0.16054550562218886,
|
|
"calibration/mean_confidence": 0.6530601834127058,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006944444444444442,
|
|
"completions/max_length": 3695.8,
|
|
"completions/max_terminated_length": 3695.8,
|
|
"completions/mean_length": 648.8045166015625,
|
|
"completions/mean_terminated_length": 653.3523803710938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 149.6,
|
|
"epoch": 0.2039974500318746,
|
|
"grad_norm": 0.002358554396778345,
|
|
"learning_rate": 3.7048192771084342e-06,
|
|
"loss": -0.0216,
|
|
"num_tokens": 172786993.0,
|
|
"reward": 0.9661161661148071,
|
|
"reward_std": 0.13587609827518463,
|
|
"rewards/accuracy_reward": 0.6900173425674438,
|
|
"rewards/brier_reward": 0.7510282516479492,
|
|
"rewards/confidence_uniqueness_reward": 0.9363226532936096,
|
|
"rewards/format_reward": 0.992881965637207,
|
|
"rewards/frontier_coverage_0": -0.04805287569761276,
|
|
"rewards/frontier_coverage_1": -0.04805287569761276,
|
|
"rewards/frontier_coverage_10": -0.04805287569761276,
|
|
"rewards/frontier_coverage_15": -0.04805287569761276,
|
|
"rewards/frontier_coverage_20": -0.04805287569761276,
|
|
"rewards/frontier_coverage_25": -0.04805287569761276,
|
|
"rewards/frontier_coverage_5": -0.04805287569761276,
|
|
"rewards/frontier_entropy_batch_reward": -0.39258493185043336,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1508843332529068,
|
|
"signal/accuracy_reward/group_std_mean": 0.20189307630062103,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9263910770416259,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0754421666264534,
|
|
"signal/advantage_abs_mean": 0.7528648376464844,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10168863832950592,
|
|
"signal/advantage_pre_scale_std": 0.1590551733970642,
|
|
"signal/advantage_std": 0.9832520723342896,
|
|
"signal/brier_reward/centered_abs_mean": 0.17436771094799042,
|
|
"signal/brier_reward/group_std_mean": 0.2172168791294098,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21471179723739625,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017436770349740983,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02653498910367489,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.045722561329603194,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.032590895891189575,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026534989941865207,
|
|
"signal/format_reward/centered_abs_mean": 0.013118489645421505,
|
|
"signal/format_reward/group_std_mean": 0.029562078043818475,
|
|
"signal/format_reward/group_zero_std_frac": 0.8611111164093017,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08024730533361435,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0065592448227107525,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14816038608551024,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19712282717227936,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.026096120849251746,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002118693618103862,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14816038608551024,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19712282717227936,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.026096120849251746,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002118693618103862,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14816038608551024,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19712282717227936,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.026096120849251746,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002118693618103862,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14816038608551024,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19712282717227936,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026096120849251746,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002118693618103862,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.14816038608551024,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19712282717227936,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.026096120849251746,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002118693618103862,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14816038608551024,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.19712282717227936,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026096120849251746,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002118693618103862,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14816038608551024,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19712282717227936,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.026096120849251746,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002118693618103862,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3885017096996307,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.44662662744522097,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4786907732486725,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03885017111897469,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2280682511148271,
|
|
"calibration/batch_distribution_entropy": 0.9191445965224319,
|
|
"calibration/buffer_distribution_entropy": 0.9203928575702902,
|
|
"calibration/confidence_entropy": 0.5090574346056453,
|
|
"calibration/coverage@0%": 0.00573603781882146,
|
|
"calibration/coverage@1%": 0.00573603781882146,
|
|
"calibration/coverage@10%": 0.055215204485488126,
|
|
"calibration/coverage@15%": 0.3963610378188215,
|
|
"calibration/coverage@20%": 0.5604235378188214,
|
|
"calibration/coverage@25%": 0.6255277044854881,
|
|
"calibration/coverage@30%": 0.7142562664907651,
|
|
"calibration/coverage@5%": 0.01667353781882146,
|
|
"calibration/ece": 0.1746891027457266,
|
|
"calibration/mean_confidence": 0.652321831845221,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00512152777777779,
|
|
"completions/max_length": 3058.2,
|
|
"completions/max_terminated_length": 3058.2,
|
|
"completions/mean_length": 614.4275146484375,
|
|
"completions/mean_terminated_length": 617.5863159179687,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 145.4,
|
|
"epoch": 0.2159973000337496,
|
|
"grad_norm": 0.002601947635412216,
|
|
"learning_rate": 3.5542168674698798e-06,
|
|
"loss": -0.0059,
|
|
"num_tokens": 182933870.0,
|
|
"reward": 0.9689822554588318,
|
|
"reward_std": 0.13325872272253036,
|
|
"rewards/accuracy_reward": 0.684375,
|
|
"rewards/brier_reward": 0.7553081393241883,
|
|
"rewards/confidence_uniqueness_reward": 0.9397584915161132,
|
|
"rewards/format_reward": 0.9948784708976746,
|
|
"rewards/frontier_coverage_0": -0.039187131077051164,
|
|
"rewards/frontier_coverage_1": -0.039187131077051164,
|
|
"rewards/frontier_coverage_10": -0.039187131077051164,
|
|
"rewards/frontier_coverage_15": -0.039187131077051164,
|
|
"rewards/frontier_coverage_20": -0.039187131077051164,
|
|
"rewards/frontier_coverage_25": -0.039187131077051164,
|
|
"rewards/frontier_coverage_5": -0.039187131077051164,
|
|
"rewards/frontier_entropy_batch_reward": -0.3622850239276886,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1550998270511627,
|
|
"signal/accuracy_reward/group_std_mean": 0.20906379520893098,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9399829387664795,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07754991352558135,
|
|
"signal/advantage_abs_mean": 0.7509470582008362,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10027577131986617,
|
|
"signal/advantage_pre_scale_std": 0.15472148954868317,
|
|
"signal/advantage_std": 0.9832675933837891,
|
|
"signal/brier_reward/centered_abs_mean": 0.18083776235580445,
|
|
"signal/brier_reward/group_std_mean": 0.22394680380821227,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21995324194431304,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018083777278661728,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024045027419924737,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03864929303526878,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029247282445430754,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002404502872377634,
|
|
"signal/format_reward/centered_abs_mean": 0.009467230830341577,
|
|
"signal/format_reward/group_std_mean": 0.02073230631649494,
|
|
"signal/format_reward/group_zero_std_frac": 0.9055555701255799,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05722929909825325,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.004733615415170788,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16228995025157927,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21590131521224976,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02817150242626667,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023207463324069976,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16228995025157927,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21590131521224976,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02817150242626667,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023207463324069976,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16228995025157927,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21590131521224976,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02817150242626667,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023207463324069976,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16228995025157927,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21590131521224976,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02817150242626667,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023207463324069976,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16228995025157927,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.21590131521224976,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02817150242626667,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023207463324069976,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16228995025157927,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.21590131521224976,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02817150242626667,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023207463324069976,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16228995025157927,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21590131521224976,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02817150242626667,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023207463324069976,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38060142397880553,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.44278682470321656,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.46376983523368837,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03806014358997345,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2656779618361436,
|
|
"calibration/batch_distribution_entropy": 0.921673087913151,
|
|
"calibration/buffer_distribution_entropy": 0.9221058711743622,
|
|
"calibration/confidence_entropy": 0.5219745548320699,
|
|
"calibration/coverage@0%": 0.003655373839760502,
|
|
"calibration/coverage@1%": 0.003655373839760502,
|
|
"calibration/coverage@10%": 0.10779749862161041,
|
|
"calibration/coverage@15%": 0.37434690961637485,
|
|
"calibration/coverage@20%": 0.4020928738397605,
|
|
"calibration/coverage@25%": 0.5248110125353711,
|
|
"calibration/coverage@30%": 0.6770166637048378,
|
|
"calibration/coverage@5%": 0.003655373839760502,
|
|
"calibration/ece": 0.1682618991312315,
|
|
"calibration/mean_confidence": 0.6443340086406893,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.003993055555555558,
|
|
"completions/max_length": 3106.8,
|
|
"completions/max_terminated_length": 3106.8,
|
|
"completions/mean_length": 624.9882690429688,
|
|
"completions/mean_terminated_length": 627.4920532226563,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 133.0,
|
|
"epoch": 0.22799715003562457,
|
|
"grad_norm": 0.0027543448377400637,
|
|
"learning_rate": 3.4036144578313257e-06,
|
|
"loss": -0.0072,
|
|
"num_tokens": 193225415.0,
|
|
"reward": 0.9632153868675232,
|
|
"reward_std": 0.12627761960029601,
|
|
"rewards/accuracy_reward": 0.6706597208976746,
|
|
"rewards/brier_reward": 0.7587080836296082,
|
|
"rewards/confidence_uniqueness_reward": 0.9400440454483032,
|
|
"rewards/format_reward": 0.9953993082046508,
|
|
"rewards/frontier_coverage_0": -0.027446018159389497,
|
|
"rewards/frontier_coverage_1": -0.027446018159389497,
|
|
"rewards/frontier_coverage_10": -0.027446018159389497,
|
|
"rewards/frontier_coverage_15": -0.027446018159389497,
|
|
"rewards/frontier_coverage_20": -0.027446018159389497,
|
|
"rewards/frontier_coverage_25": -0.027446018159389497,
|
|
"rewards/frontier_coverage_5": -0.027446018159389497,
|
|
"rewards/frontier_entropy_batch_reward": -0.3694201588630676,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.13590494990348817,
|
|
"signal/accuracy_reward/group_std_mean": 0.1884896844625473,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43055555820465086,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8468737006187439,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06795247495174409,
|
|
"signal/advantage_abs_mean": 0.752357542514801,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09376581460237503,
|
|
"signal/advantage_pre_scale_std": 0.1467900037765503,
|
|
"signal/advantage_std": 0.983231246471405,
|
|
"signal/brier_reward/centered_abs_mean": 0.1709260106086731,
|
|
"signal/brier_reward/group_std_mean": 0.21340954005718232,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21400478780269622,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017092601954936983,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022889725863933563,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.037352363020181654,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02867819517850876,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022889725398272274,
|
|
"signal/format_reward/centered_abs_mean": 0.008599175233393907,
|
|
"signal/format_reward/group_std_mean": 0.019867047667503357,
|
|
"signal/format_reward/group_zero_std_frac": 0.9055555701255799,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.053667180240154266,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.004299587616696954,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15505702197551727,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2044772982597351,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.027795213833451272,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022173153702169657,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15505702197551727,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2044772982597351,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.027795213833451272,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022173153702169657,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15505702197551727,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2044772982597351,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.027795213833451272,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022173153702169657,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15505702197551727,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2044772982597351,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.027795213833451272,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022173153702169657,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15505702197551727,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2044772982597351,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027795213833451272,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022173153702169657,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.15505702197551727,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2044772982597351,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027795213833451272,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022173153702169657,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15505702197551727,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2044772982597351,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.027795213833451272,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022173153702169657,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38333263993263245,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4431163430213928,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.48075162172317504,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038333263248205185,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20759311463380917,
|
|
"calibration/batch_distribution_entropy": 0.9477314132460055,
|
|
"calibration/buffer_distribution_entropy": 0.9242834024494486,
|
|
"calibration/confidence_entropy": 0.5270175050710435,
|
|
"calibration/coverage@0%": 0.009675166218401008,
|
|
"calibration/coverage@1%": 0.009675166218401008,
|
|
"calibration/coverage@10%": 0.0938530992208724,
|
|
"calibration/coverage@15%": 0.26268486014223075,
|
|
"calibration/coverage@20%": 0.616002402139018,
|
|
"calibration/coverage@25%": 0.7508021390374331,
|
|
"calibration/coverage@30%": 0.9032085561497327,
|
|
"calibration/coverage@5%": 0.009675166218401008,
|
|
"calibration/ece": 0.1630102667152334,
|
|
"calibration/mean_confidence": 0.6048623992059596,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.025954861111111116,
|
|
"completions/max_length": 3420.6,
|
|
"completions/max_terminated_length": 3420.6,
|
|
"completions/mean_length": 632.4453002929688,
|
|
"completions/mean_terminated_length": 649.3080322265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 167.6,
|
|
"epoch": 0.23999700003749952,
|
|
"grad_norm": 0.002470463514328003,
|
|
"learning_rate": 3.2530120481927713e-06,
|
|
"loss": -0.0635,
|
|
"num_tokens": 203610257.0,
|
|
"reward": 0.9558441638946533,
|
|
"reward_std": 0.1599712163209915,
|
|
"rewards/accuracy_reward": 0.6660590291023254,
|
|
"rewards/brier_reward": 0.7662190675735474,
|
|
"rewards/confidence_uniqueness_reward": 0.9209245562553405,
|
|
"rewards/format_reward": 0.9719617962837219,
|
|
"rewards/frontier_coverage_0": -0.00996593926101923,
|
|
"rewards/frontier_coverage_1": -0.00996593926101923,
|
|
"rewards/frontier_coverage_10": -0.00996593926101923,
|
|
"rewards/frontier_coverage_15": -0.00996593926101923,
|
|
"rewards/frontier_coverage_20": -0.00996593926101923,
|
|
"rewards/frontier_coverage_25": -0.00996593926101923,
|
|
"rewards/frontier_coverage_5": -0.00996593926101923,
|
|
"rewards/frontier_entropy_batch_reward": -0.3088305056095123,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1613226979970932,
|
|
"signal/accuracy_reward/group_std_mean": 0.2118624597787857,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.397222226858139,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9061164379119873,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0806613489985466,
|
|
"signal/advantage_abs_mean": 0.7433346390724183,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11672266870737076,
|
|
"signal/advantage_pre_scale_std": 0.19095246195793153,
|
|
"signal/advantage_std": 0.9833595633506775,
|
|
"signal/brier_reward/centered_abs_mean": 0.17435405254364014,
|
|
"signal/brier_reward/group_std_mean": 0.22024931907653808,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19577785432338715,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017435405775904654,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05343219414353371,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0928901955485344,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05993582606315613,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005343219451606274,
|
|
"signal/format_reward/centered_abs_mean": 0.04528537318110466,
|
|
"signal/format_reward/group_std_mean": 0.08404082655906678,
|
|
"signal/format_reward/group_zero_std_frac": 0.6611111044883728,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.25386848151683805,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.02264268659055233,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16983620524406434,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22350181639194489,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02728012129664421,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024286577478051185,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16983620524406434,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22350181639194489,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02728012129664421,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024286577478051185,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16983620524406434,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22350181639194489,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02728012129664421,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024286577478051185,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16983620524406434,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22350181639194489,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02728012129664421,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024286577478051185,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16983620524406434,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22350181639194489,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02728012129664421,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024286577478051185,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16983620524406434,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22350181639194489,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02728012129664421,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024286577478051185,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16983620524406434,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22350181639194489,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02728012129664421,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024286577478051185,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3497317969799042,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4170763075351715,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.39279434084892273,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034973180294036864,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.23999700003749952,
|
|
"eval_calibration/aurc": 0.163130081233984,
|
|
"eval_calibration/batch_distribution_entropy": 0.9014365191445948,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9264574248539187,
|
|
"eval_calibration/confidence_entropy": 0.5035177426390464,
|
|
"eval_calibration/coverage@0%": 0.15947580645161288,
|
|
"eval_calibration/coverage@1%": 0.15947580645161288,
|
|
"eval_calibration/coverage@10%": 0.3776993727598566,
|
|
"eval_calibration/coverage@15%": 0.4990255376344086,
|
|
"eval_calibration/coverage@20%": 0.717909946236559,
|
|
"eval_calibration/coverage@25%": 0.8870967741935484,
|
|
"eval_calibration/coverage@30%": 0.9946236559139785,
|
|
"eval_calibration/coverage@5%": 0.24801747311827957,
|
|
"eval_calibration/ece": 0.272578609146035,
|
|
"eval_calibration/mean_confidence": 0.6127355205524417,
|
|
"eval_completions/clipped_ratio": 0.024131944444444442,
|
|
"eval_completions/max_length": 2405.1666666666665,
|
|
"eval_completions/max_terminated_length": 2405.1666666666665,
|
|
"eval_completions/mean_length": 635.5255432128906,
|
|
"eval_completions/mean_terminated_length": 651.1894124348959,
|
|
"eval_completions/min_length": 0.0,
|
|
"eval_completions/min_terminated_length": 199.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 203610257.0,
|
|
"eval_reward": 0.8911056915918986,
|
|
"eval_reward_std": 0.2602160597840945,
|
|
"eval_rewards/accuracy_reward": 0.6770833333333334,
|
|
"eval_rewards/brier_reward": 0.7757821977138519,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8653644323348999,
|
|
"eval_rewards/format_reward": 0.9722222089767456,
|
|
"eval_rewards/frontier_coverage_0": -0.004391265024120609,
|
|
"eval_rewards/frontier_coverage_1": -0.004391265024120609,
|
|
"eval_rewards/frontier_coverage_10": -0.004391265024120609,
|
|
"eval_rewards/frontier_coverage_15": -0.004391265024120609,
|
|
"eval_rewards/frontier_coverage_20": -0.004391265024120609,
|
|
"eval_rewards/frontier_coverage_25": -0.004391265024120609,
|
|
"eval_rewards/frontier_coverage_5": -0.004391265024120609,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9722222089767456,
|
|
"eval_runtime": 207.7682,
|
|
"eval_samples_per_second": 4.813,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4248046825329463,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4674356331427892,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8317528963088989,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21240234126647314,
|
|
"eval_signal/advantage_abs_mean": 0.8401626845200857,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21814856926600137,
|
|
"eval_signal/advantage_pre_scale_std": 0.2586393654346466,
|
|
"eval_signal/advantage_std": 0.9864379862944285,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.21349711219469705,
|
|
"eval_signal/brier_reward/group_std_mean": 0.27180638660987216,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0832139253616333,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.021349711654086907,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0751443641881148,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.14167124529679617,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02908085659146309,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0075144364188114805,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.05284288184096416,
|
|
"eval_signal/format_reward/group_std_mean": 0.1326932366937399,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.3333333407839139,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.10094406145314376,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.02642144092048208,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.21956058591604233,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.3250137319167455,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.012314057908952236,
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031397163790340223,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.21956058591604233,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.3250137319167455,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.012314057908952236,
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031397163790340223,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.21956058591604233,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.3250137319167455,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012314057908952236,
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031397163790340223,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.21956058591604233,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.3250137319167455,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012314057908952236,
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031397163790340223,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.21956058591604233,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.3250137319167455,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012314057908952236,
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031397163790340223,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.21956058591604233,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.3250137319167455,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.012314057908952236,
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031397163790340223,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.21956058591604233,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.3250137319167455,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.012314057908952236,
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031397163790340223,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.05284288184096416,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.1326932366937399,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3333333407839139,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02018881356343627,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.005284288238423566,
|
|
"eval_steps_per_second": 0.029,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27758447836015676,
|
|
"calibration/batch_distribution_entropy": 0.9414206143630739,
|
|
"calibration/buffer_distribution_entropy": 0.927855162724946,
|
|
"calibration/confidence_entropy": 0.47734935606033496,
|
|
"calibration/coverage@0%": 0.0075463989800637786,
|
|
"calibration/coverage@1%": 0.0075463989800637786,
|
|
"calibration/coverage@10%": 0.12135855367619637,
|
|
"calibration/coverage@15%": 0.24880930719793426,
|
|
"calibration/coverage@20%": 0.33652223634634276,
|
|
"calibration/coverage@25%": 0.4669609459964935,
|
|
"calibration/coverage@30%": 0.6008762480729914,
|
|
"calibration/coverage@5%": 0.05174529400768809,
|
|
"calibration/ece": 0.13926885015949225,
|
|
"calibration/mean_confidence": 0.6254293779927222,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.025,
|
|
"completions/max_length": 3690.0,
|
|
"completions/max_terminated_length": 3690.0,
|
|
"completions/mean_length": 632.2384643554688,
|
|
"completions/mean_terminated_length": 648.3413330078125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 150.0,
|
|
"epoch": 0.2519968500393745,
|
|
"grad_norm": 0.0030207051895558834,
|
|
"learning_rate": 3.1024096385542172e-06,
|
|
"loss": -0.07,
|
|
"num_tokens": 213970508.0,
|
|
"reward": 0.9623825907707214,
|
|
"reward_std": 0.1516391783952713,
|
|
"rewards/accuracy_reward": 0.6657986164093017,
|
|
"rewards/brier_reward": 0.7901792764663697,
|
|
"rewards/confidence_uniqueness_reward": 0.9224253416061401,
|
|
"rewards/format_reward": 0.9745659708976746,
|
|
"rewards/frontier_coverage_0": 0.02123640524223447,
|
|
"rewards/frontier_coverage_1": 0.02123640524223447,
|
|
"rewards/frontier_coverage_10": 0.02123640524223447,
|
|
"rewards/frontier_coverage_15": 0.02123640524223447,
|
|
"rewards/frontier_coverage_20": 0.02123640524223447,
|
|
"rewards/frontier_coverage_25": 0.02123640524223447,
|
|
"rewards/frontier_coverage_5": 0.02123640524223447,
|
|
"rewards/frontier_entropy_batch_reward": -0.3118593841791153,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15443793088197708,
|
|
"signal/accuracy_reward/group_std_mean": 0.20904378294944764,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9191455960273742,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07721896544098854,
|
|
"signal/advantage_abs_mean": 0.7209781050682068,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10629049986600876,
|
|
"signal/advantage_pre_scale_std": 0.1804076611995697,
|
|
"signal/advantage_std": 0.9832780361175537,
|
|
"signal/brier_reward/centered_abs_mean": 0.15626430809497832,
|
|
"signal/brier_reward/group_std_mean": 0.20239726901054383,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18771646320819854,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015626430884003638,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.052898465842008593,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09158898591995239,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06356689184904099,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005289846519008279,
|
|
"signal/format_reward/centered_abs_mean": 0.04350586049258709,
|
|
"signal/format_reward/group_std_mean": 0.08119002729654312,
|
|
"signal/format_reward/group_zero_std_frac": 0.6722222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2607091456651688,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.021752930246293545,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17601246535778045,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23166741728782653,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030123594403266906,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025169781874865294,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17601246535778045,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23166741728782653,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030123594403266906,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025169781874865294,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17601246535778045,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23166741728782653,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030123594403266906,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025169781874865294,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17601246535778045,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23166741728782653,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030123594403266906,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025169781874865294,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17601246535778045,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23166741728782653,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030123594403266906,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025169781874865294,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17601246535778045,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23166741728782653,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030123594403266906,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025169781874865294,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17601246535778045,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23166741728782653,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030123594403266906,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025169781874865294,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.334915554523468,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4050456404685974,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.40600050091743467,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033491555601358414,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1623917508484302,
|
|
"calibration/batch_distribution_entropy": 0.9382471960574534,
|
|
"calibration/buffer_distribution_entropy": 0.9295560400990392,
|
|
"calibration/confidence_entropy": 0.48187896497077476,
|
|
"calibration/coverage@0%": 0.029851500799161275,
|
|
"calibration/coverage@1%": 0.029851500799161275,
|
|
"calibration/coverage@10%": 0.4238398178142801,
|
|
"calibration/coverage@15%": 0.5081844529158787,
|
|
"calibration/coverage@20%": 0.6088912176070422,
|
|
"calibration/coverage@25%": 0.8298052000748772,
|
|
"calibration/coverage@30%": 0.9122503108164111,
|
|
"calibration/coverage@5%": 0.22448740089895208,
|
|
"calibration/ece": 0.1263346756856248,
|
|
"calibration/mean_confidence": 0.6134829214978298,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010416666666666652,
|
|
"completions/max_length": 3428.0,
|
|
"completions/max_terminated_length": 3428.0,
|
|
"completions/mean_length": 629.52822265625,
|
|
"completions/mean_terminated_length": 636.1643432617187,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 141.8,
|
|
"epoch": 0.2639967000412495,
|
|
"grad_norm": 0.003066908335313201,
|
|
"learning_rate": 2.9518072289156627e-06,
|
|
"loss": -0.0232,
|
|
"num_tokens": 224331121.0,
|
|
"reward": 0.9897796273231506,
|
|
"reward_std": 0.12779354751110078,
|
|
"rewards/accuracy_reward": 0.7029513835906982,
|
|
"rewards/brier_reward": 0.7993221998214721,
|
|
"rewards/confidence_uniqueness_reward": 0.9374632716178894,
|
|
"rewards/format_reward": 0.9893229126930236,
|
|
"rewards/frontier_coverage_0": 0.0023610764765180647,
|
|
"rewards/frontier_coverage_1": 0.0023610764765180647,
|
|
"rewards/frontier_coverage_10": 0.0023610764765180647,
|
|
"rewards/frontier_coverage_15": 0.0023610764765180647,
|
|
"rewards/frontier_coverage_20": 0.0023610764765180647,
|
|
"rewards/frontier_coverage_25": 0.0023610764765180647,
|
|
"rewards/frontier_coverage_5": 0.0023610764765180647,
|
|
"rewards/frontier_entropy_batch_reward": -0.3027245044708252,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1584743946790695,
|
|
"signal/accuracy_reward/group_std_mean": 0.20634441077709198,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0239338517189025,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07923719733953476,
|
|
"signal/advantage_abs_mean": 0.7589234232902526,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0962674856185913,
|
|
"signal/advantage_pre_scale_std": 0.1529387891292572,
|
|
"signal/advantage_std": 0.9831875920295715,
|
|
"signal/brier_reward/centered_abs_mean": 0.14430948197841645,
|
|
"signal/brier_reward/group_std_mean": 0.1849027007818222,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1869402378797531,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014430948719382285,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03034689761698246,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05047857165336609,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03914179354906082,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030346899293363094,
|
|
"signal/format_reward/centered_abs_mean": 0.017876519449055196,
|
|
"signal/format_reward/group_std_mean": 0.03566114716231823,
|
|
"signal/format_reward/group_zero_std_frac": 0.8444444537162781,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.11467134803533555,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008938259724527598,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17844413220882416,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23779484033584594,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03313328959047794,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002551751025021076,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17844413220882416,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23779484033584594,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03313328959047794,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002551751025021076,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17844413220882416,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23779484033584594,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03313328959047794,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002551751025021076,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17844413220882416,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23779484033584594,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03313328959047794,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002551751025021076,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17844413220882416,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23779484033584594,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03313328959047794,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002551751025021076,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17844413220882416,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23779484033584594,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03313328959047794,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002551751025021076,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17844413220882416,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23779484033584594,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03313328959047794,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002551751025021076,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.336598539352417,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40353216528892516,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4363815426826477,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03365985415875912,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2785963055836248,
|
|
"calibration/batch_distribution_entropy": 0.9652125750310209,
|
|
"calibration/buffer_distribution_entropy": 0.9330482648173744,
|
|
"calibration/confidence_entropy": 0.49621660057634137,
|
|
"calibration/coverage@0%": 0.007910071105482502,
|
|
"calibration/coverage@1%": 0.007910071105482502,
|
|
"calibration/coverage@10%": 0.029982246120544936,
|
|
"calibration/coverage@15%": 0.1566908143448694,
|
|
"calibration/coverage@20%": 0.3872066256148847,
|
|
"calibration/coverage@25%": 0.582606519819888,
|
|
"calibration/coverage@30%": 0.6653970694296458,
|
|
"calibration/coverage@5%": 0.007910071105482502,
|
|
"calibration/ece": 0.1682723224397847,
|
|
"calibration/mean_confidence": 0.5473267088527768,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.016145833333333325,
|
|
"completions/max_length": 3463.0,
|
|
"completions/max_terminated_length": 3463.0,
|
|
"completions/mean_length": 619.2389770507813,
|
|
"completions/mean_terminated_length": 629.3917236328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 144.4,
|
|
"epoch": 0.27599655004312446,
|
|
"grad_norm": 0.002968505723401904,
|
|
"learning_rate": 2.8012048192771087e-06,
|
|
"loss": -0.0457,
|
|
"num_tokens": 234543954.0,
|
|
"reward": 0.9655173897743226,
|
|
"reward_std": 0.13793158531188965,
|
|
"rewards/accuracy_reward": 0.6552951335906982,
|
|
"rewards/brier_reward": 0.7801418542861939,
|
|
"rewards/confidence_uniqueness_reward": 0.9340383648872376,
|
|
"rewards/format_reward": 0.9837673664093017,
|
|
"rewards/frontier_coverage_0": 0.02393667958676815,
|
|
"rewards/frontier_coverage_1": 0.02393667958676815,
|
|
"rewards/frontier_coverage_10": 0.02393667958676815,
|
|
"rewards/frontier_coverage_15": 0.02393667958676815,
|
|
"rewards/frontier_coverage_20": 0.02393667958676815,
|
|
"rewards/frontier_coverage_25": 0.02393667958676815,
|
|
"rewards/frontier_coverage_5": 0.02393667958676815,
|
|
"rewards/frontier_entropy_batch_reward": -0.27827951312065125,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16129014790058135,
|
|
"signal/accuracy_reward/group_std_mean": 0.20588865578174592,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4388889014720917,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9973422050476074,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08064507395029068,
|
|
"signal/advantage_abs_mean": 0.7537703037261962,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10260143429040909,
|
|
"signal/advantage_pre_scale_std": 0.16498699486255647,
|
|
"signal/advantage_std": 0.983244001865387,
|
|
"signal/brier_reward/centered_abs_mean": 0.16035984754562377,
|
|
"signal/brier_reward/group_std_mean": 0.20447275638580323,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19879674315452575,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01603598427027464,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037899629771709444,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06762906014919282,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04686418101191521,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037899629678577185,
|
|
"signal/format_reward/centered_abs_mean": 0.02791341170668602,
|
|
"signal/format_reward/group_std_mean": 0.05625998750329018,
|
|
"signal/format_reward/group_zero_std_frac": 0.7555555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.17200126945972444,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01395670585334301,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2007855713367462,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2609905391931534,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035559892654418945,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028712335973978043,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2007855713367462,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2609905391931534,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035559892654418945,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028712335973978043,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2007855713367462,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2609905391931534,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.035559892654418945,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028712335973978043,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2007855713367462,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2609905391931534,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.035559892654418945,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028712335973978043,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2007855713367462,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2609905391931534,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.035559892654418945,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028712335973978043,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2007855713367462,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2609905391931534,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.035559892654418945,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028712335973978043,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2007855713367462,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2609905391931534,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035559892654418945,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028712335973978043,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.335198974609375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4032855689525604,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4162396967411041,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033519898727536204,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25985110868658695,
|
|
"calibration/batch_distribution_entropy": 0.9631548287364671,
|
|
"calibration/buffer_distribution_entropy": 0.9368432393792772,
|
|
"calibration/confidence_entropy": 0.45159534427369225,
|
|
"calibration/coverage@0%": 0.0010471275946903505,
|
|
"calibration/coverage@1%": 0.0010471275946903505,
|
|
"calibration/coverage@10%": 0.21385096429441958,
|
|
"calibration/coverage@15%": 0.39416616560362666,
|
|
"calibration/coverage@20%": 0.4899177481296418,
|
|
"calibration/coverage@25%": 0.5565116507652067,
|
|
"calibration/coverage@30%": 0.6357331500523827,
|
|
"calibration/coverage@5%": 0.02151956853957224,
|
|
"calibration/ece": 0.16906323787142602,
|
|
"calibration/mean_confidence": 0.553027232437221,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009895833333333348,
|
|
"completions/max_length": 3375.2,
|
|
"completions/max_terminated_length": 3375.2,
|
|
"completions/mean_length": 620.9748413085938,
|
|
"completions/mean_terminated_length": 627.0546997070312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 152.2,
|
|
"epoch": 0.28799640004499943,
|
|
"grad_norm": 0.003942957613617182,
|
|
"learning_rate": 2.6506024096385547e-06,
|
|
"loss": -0.0242,
|
|
"num_tokens": 244779440.0,
|
|
"reward": 0.9828472375869751,
|
|
"reward_std": 0.1266437292098999,
|
|
"rewards/accuracy_reward": 0.6784722208976746,
|
|
"rewards/brier_reward": 0.7945773005485535,
|
|
"rewards/confidence_uniqueness_reward": 0.9401492238044739,
|
|
"rewards/format_reward": 0.9900173664093017,
|
|
"rewards/frontier_coverage_0": 0.02642001286149025,
|
|
"rewards/frontier_coverage_1": 0.02642001286149025,
|
|
"rewards/frontier_coverage_10": 0.02642001286149025,
|
|
"rewards/frontier_coverage_15": 0.02642001286149025,
|
|
"rewards/frontier_coverage_20": 0.02642001286149025,
|
|
"rewards/frontier_coverage_25": 0.02642001286149025,
|
|
"rewards/frontier_coverage_5": 0.02642001286149025,
|
|
"rewards/frontier_entropy_batch_reward": -0.2751484811306,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15174696147441863,
|
|
"signal/accuracy_reward/group_std_mean": 0.2080085426568985,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.37500000596046446,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9443390369415283,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07587348073720931,
|
|
"signal/advantage_abs_mean": 0.7379367828369141,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0910405844449997,
|
|
"signal/advantage_pre_scale_std": 0.14757494032382965,
|
|
"signal/advantage_std": 0.9832143902778625,
|
|
"signal/brier_reward/centered_abs_mean": 0.16116996705532075,
|
|
"signal/brier_reward/group_std_mean": 0.20735826790332795,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20363759398460388,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016116996854543687,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03047032840549946,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05480174720287323,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03763532117009163,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003047033119946718,
|
|
"signal/format_reward/centered_abs_mean": 0.01847330704331398,
|
|
"signal/format_reward/group_std_mean": 0.040765970945358276,
|
|
"signal/format_reward/group_zero_std_frac": 0.8138888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.11124600917100906,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00923665352165699,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20754149556159973,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2732445240020752,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03756205141544342,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002967843320220709,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20754149556159973,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2732445240020752,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03756205141544342,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002967843320220709,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20754149556159973,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2732445240020752,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03756205141544342,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002967843320220709,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20754149556159973,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2732445240020752,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03756205141544342,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002967843320220709,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20754149556159973,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2732445240020752,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03756205141544342,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002967843320220709,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20754149556159973,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2732445240020752,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03756205141544342,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002967843320220709,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20754149556159973,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2732445240020752,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03756205141544342,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002967843320220709,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3402287781238556,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41207742094993594,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4308716356754303,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0340228796005249,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1702944249457136,
|
|
"calibration/batch_distribution_entropy": 0.9474281075428994,
|
|
"calibration/buffer_distribution_entropy": 0.9399565674518694,
|
|
"calibration/confidence_entropy": 0.4981072863512715,
|
|
"calibration/coverage@0%": 0.10222342219869385,
|
|
"calibration/coverage@1%": 0.18714101358250584,
|
|
"calibration/coverage@10%": 0.352484518804438,
|
|
"calibration/coverage@15%": 0.38542244882575055,
|
|
"calibration/coverage@20%": 0.5395064520329215,
|
|
"calibration/coverage@25%": 0.7593703528573289,
|
|
"calibration/coverage@30%": 0.8751834871815959,
|
|
"calibration/coverage@5%": 0.306036520545082,
|
|
"calibration/ece": 0.16359838991861636,
|
|
"calibration/mean_confidence": 0.5928311777908328,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.005468750000000022,
|
|
"completions/max_length": 2949.2,
|
|
"completions/max_terminated_length": 2949.2,
|
|
"completions/mean_length": 635.7155395507813,
|
|
"completions/mean_terminated_length": 639.208154296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 139.6,
|
|
"epoch": 0.2999962500468744,
|
|
"grad_norm": 0.0034135030582547188,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": -0.0155,
|
|
"num_tokens": 255220547.0,
|
|
"reward": 0.9929954290390015,
|
|
"reward_std": 0.12329381704330444,
|
|
"rewards/accuracy_reward": 0.6967013955116272,
|
|
"rewards/brier_reward": 0.8013178825378418,
|
|
"rewards/confidence_uniqueness_reward": 0.9443058967590332,
|
|
"rewards/format_reward": 0.9944444298744202,
|
|
"rewards/frontier_coverage_0": 0.01054713288322091,
|
|
"rewards/frontier_coverage_1": 0.01054713288322091,
|
|
"rewards/frontier_coverage_10": 0.01054713288322091,
|
|
"rewards/frontier_coverage_15": 0.01054713288322091,
|
|
"rewards/frontier_coverage_20": 0.01054713288322091,
|
|
"rewards/frontier_coverage_25": 0.01054713288322091,
|
|
"rewards/frontier_coverage_5": 0.01054713288322091,
|
|
"rewards/frontier_entropy_batch_reward": -0.2819568753242493,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15746527910232544,
|
|
"signal/accuracy_reward/group_std_mean": 0.2058452069759369,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4194444537162781,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0113989472389222,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07873263955116272,
|
|
"signal/advantage_abs_mean": 0.7568188905715942,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0927268460392952,
|
|
"signal/advantage_pre_scale_std": 0.14396594166755677,
|
|
"signal/advantage_std": 0.9831969380378723,
|
|
"signal/brier_reward/centered_abs_mean": 0.14777444005012513,
|
|
"signal/brier_reward/group_std_mean": 0.18979325294494628,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18979544341564178,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014777444303035736,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022845935076475143,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03979781419038773,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029593577980995177,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002284593554213643,
|
|
"signal/format_reward/centered_abs_mean": 0.0103624127805233,
|
|
"signal/format_reward/group_std_mean": 0.024810751900076866,
|
|
"signal/format_reward/group_zero_std_frac": 0.8777777791023255,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0675680547952652,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00518120639026165,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19016571938991547,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24880056381225585,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0349818117916584,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002719369810074568,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19016571938991547,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24880056381225585,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0349818117916584,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002719369810074568,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19016571938991547,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24880056381225585,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0349818117916584,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002719369810074568,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19016571938991547,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24880056381225585,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0349818117916584,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002719369810074568,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19016571938991547,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24880056381225585,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0349818117916584,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002719369810074568,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19016571938991547,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24880056381225585,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0349818117916584,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002719369810074568,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19016571938991547,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24880056381225585,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0349818117916584,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002719369810074568,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3356120824813843,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4034553825855255,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4321089446544647,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03356120809912681,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21974228957333825,
|
|
"calibration/batch_distribution_entropy": 0.957767837624923,
|
|
"calibration/buffer_distribution_entropy": 0.9414282291526312,
|
|
"calibration/confidence_entropy": 0.4740419511530939,
|
|
"calibration/coverage@0%": 0.005249582744674368,
|
|
"calibration/coverage@1%": 0.005249582744674368,
|
|
"calibration/coverage@10%": 0.22161801602834014,
|
|
"calibration/coverage@15%": 0.2912255509209624,
|
|
"calibration/coverage@20%": 0.5404524177537375,
|
|
"calibration/coverage@25%": 0.7363437705207907,
|
|
"calibration/coverage@30%": 0.8552729630880902,
|
|
"calibration/coverage@5%": 0.052246971778616924,
|
|
"calibration/ece": 0.13019231823487484,
|
|
"calibration/mean_confidence": 0.5658447997080991,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009722222222222233,
|
|
"completions/max_length": 3547.8,
|
|
"completions/max_terminated_length": 3547.8,
|
|
"completions/mean_length": 649.3093017578125,
|
|
"completions/mean_terminated_length": 655.8386352539062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 140.2,
|
|
"epoch": 0.3119961000487494,
|
|
"grad_norm": 0.0037228513974696398,
|
|
"learning_rate": 2.349397590361446e-06,
|
|
"loss": -0.0148,
|
|
"num_tokens": 265825390.0,
|
|
"reward": 0.9755982637405396,
|
|
"reward_std": 0.13257428556680678,
|
|
"rewards/accuracy_reward": 0.6615451335906982,
|
|
"rewards/brier_reward": 0.7983024001121521,
|
|
"rewards/confidence_uniqueness_reward": 0.9400404095649719,
|
|
"rewards/format_reward": 0.9899305462837219,
|
|
"rewards/frontier_coverage_0": 0.036238094815053044,
|
|
"rewards/frontier_coverage_1": 0.036238094815053044,
|
|
"rewards/frontier_coverage_10": 0.036238094815053044,
|
|
"rewards/frontier_coverage_15": 0.036238094815053044,
|
|
"rewards/frontier_coverage_20": 0.036238094815053044,
|
|
"rewards/frontier_coverage_25": 0.036238094815053044,
|
|
"rewards/frontier_coverage_5": 0.036238094815053044,
|
|
"rewards/frontier_entropy_batch_reward": -0.2760128676891327,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18161349892616271,
|
|
"signal/accuracy_reward/group_std_mean": 0.23401132524013518,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3555555582046509,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.1416666626930236,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09080674946308136,
|
|
"signal/advantage_abs_mean": 0.764301085472107,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10157442539930343,
|
|
"signal/advantage_pre_scale_std": 0.1548332154750824,
|
|
"signal/advantage_std": 0.9832197904586792,
|
|
"signal/brier_reward/centered_abs_mean": 0.1567206412553787,
|
|
"signal/brier_reward/group_std_mean": 0.1994690865278244,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1979391247034073,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01567206475883722,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02809174992144108,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04275588467717171,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03521875329315662,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028091749642044305,
|
|
"signal/format_reward/centered_abs_mean": 0.015679253730922937,
|
|
"signal/format_reward/group_std_mean": 0.027723340317606926,
|
|
"signal/format_reward/group_zero_std_frac": 0.8888888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09744075834751129,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007839626865461469,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2045228362083435,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2676228523254395,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0369983471930027,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002924676425755024,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2045228362083435,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2676228523254395,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0369983471930027,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002924676425755024,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2045228362083435,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2676228523254395,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0369983471930027,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002924676425755024,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2045228362083435,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2676228523254395,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0369983471930027,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002924676425755024,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2045228362083435,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2676228523254395,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0369983471930027,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002924676425755024,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2045228362083435,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2676228523254395,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0369983471930027,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002924676425755024,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2045228362083435,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2676228523254395,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0369983471930027,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002924676425755024,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3297392189502716,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39750961065292356,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4163591504096985,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032973920553922655,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22207504778154225,
|
|
"calibration/batch_distribution_entropy": 0.9701411645819956,
|
|
"calibration/buffer_distribution_entropy": 0.9441236363940959,
|
|
"calibration/confidence_entropy": 0.4802381286994982,
|
|
"calibration/coverage@0%": 0.013629843240210753,
|
|
"calibration/coverage@1%": 0.013629843240210753,
|
|
"calibration/coverage@10%": 0.2837484478954534,
|
|
"calibration/coverage@15%": 0.3598894524847338,
|
|
"calibration/coverage@20%": 0.5369423730219428,
|
|
"calibration/coverage@25%": 0.6182801199086677,
|
|
"calibration/coverage@30%": 0.6674144997591183,
|
|
"calibration/coverage@5%": 0.2120673432402108,
|
|
"calibration/ece": 0.15988758741196968,
|
|
"calibration/mean_confidence": 0.5410395064628674,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009027777777777768,
|
|
"completions/max_length": 3522.0,
|
|
"completions/max_terminated_length": 3522.0,
|
|
"completions/mean_length": 615.496728515625,
|
|
"completions/mean_terminated_length": 621.16328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 132.8,
|
|
"epoch": 0.32399595005062437,
|
|
"grad_norm": 0.0039030034095048904,
|
|
"learning_rate": 2.1987951807228917e-06,
|
|
"loss": -0.0258,
|
|
"num_tokens": 276008936.0,
|
|
"reward": 0.9833725333213806,
|
|
"reward_std": 0.13102127313613893,
|
|
"rewards/accuracy_reward": 0.6739583253860474,
|
|
"rewards/brier_reward": 0.788122546672821,
|
|
"rewards/confidence_uniqueness_reward": 0.9430952072143555,
|
|
"rewards/format_reward": 0.9907986164093018,
|
|
"rewards/frontier_coverage_0": 0.018168472126126288,
|
|
"rewards/frontier_coverage_1": 0.018168472126126288,
|
|
"rewards/frontier_coverage_10": 0.018168472126126288,
|
|
"rewards/frontier_coverage_15": 0.018168472126126288,
|
|
"rewards/frontier_coverage_20": 0.018168472126126288,
|
|
"rewards/frontier_coverage_25": 0.018168472126126288,
|
|
"rewards/frontier_coverage_5": 0.018168472126126288,
|
|
"rewards/frontier_entropy_batch_reward": -0.2394638776779175,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17725694477558135,
|
|
"signal/accuracy_reward/group_std_mean": 0.22965039312839508,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.36388889253139495,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0828640937805176,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08862847238779067,
|
|
"signal/advantage_abs_mean": 0.7517815709114075,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09779231399297714,
|
|
"signal/advantage_pre_scale_std": 0.15307309925556184,
|
|
"signal/advantage_std": 0.983241617679596,
|
|
"signal/brier_reward/centered_abs_mean": 0.15630776584148406,
|
|
"signal/brier_reward/group_std_mean": 0.19986412227153777,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1932190716266632,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015630776807665826,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027252191677689552,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.046789034456014636,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.034193987399339675,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027252191212028265,
|
|
"signal/format_reward/centered_abs_mean": 0.016525607742369174,
|
|
"signal/format_reward/group_std_mean": 0.03393084555864334,
|
|
"signal/format_reward/group_zero_std_frac": 0.850000011920929,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10422060191631317,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008262803871184587,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2169239789247513,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2821938157081604,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03821746855974197,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031020127702504397,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2169239789247513,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2821938157081604,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03821746855974197,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031020127702504397,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2169239789247513,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2821938157081604,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03821746855974197,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031020127702504397,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2169239789247513,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2821938157081604,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03821746855974197,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031020127702504397,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2169239789247513,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2821938157081604,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03821746855974197,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031020127702504397,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2169239789247513,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2821938157081604,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03821746855974197,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031020127702504397,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2169239789247513,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2821938157081604,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03821746855974197,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031020127702504397,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3079935610294342,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3808625817298889,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3828635513782501,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03079935573041439,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1337878460272878,
|
|
"calibration/batch_distribution_entropy": 0.9491518389855494,
|
|
"calibration/buffer_distribution_entropy": 0.9504134002453654,
|
|
"calibration/confidence_entropy": 0.470818452840835,
|
|
"calibration/coverage@0%": 0.02919666230366492,
|
|
"calibration/coverage@1%": 0.02919666230366492,
|
|
"calibration/coverage@10%": 0.45605468305013697,
|
|
"calibration/coverage@15%": 0.7058141127727731,
|
|
"calibration/coverage@20%": 0.789229159831677,
|
|
"calibration/coverage@25%": 0.8711048675379001,
|
|
"calibration/coverage@30%": 0.9482999031140213,
|
|
"calibration/coverage@5%": 0.19594786212914483,
|
|
"calibration/ece": 0.11502078156694107,
|
|
"calibration/mean_confidence": 0.6127720502496946,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00581597222222221,
|
|
"completions/max_length": 3342.4,
|
|
"completions/max_terminated_length": 3342.4,
|
|
"completions/mean_length": 629.29306640625,
|
|
"completions/mean_terminated_length": 632.9800415039062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 163.2,
|
|
"epoch": 0.33599580005249935,
|
|
"grad_norm": 0.004057453945279121,
|
|
"learning_rate": 2.0481927710843377e-06,
|
|
"loss": -0.0122,
|
|
"num_tokens": 286362616.0,
|
|
"reward": 0.9767401456832886,
|
|
"reward_std": 0.12093752324581146,
|
|
"rewards/accuracy_reward": 0.6546006917953491,
|
|
"rewards/brier_reward": 0.808280074596405,
|
|
"rewards/confidence_uniqueness_reward": 0.9443035125732422,
|
|
"rewards/format_reward": 0.9940104126930237,
|
|
"rewards/frontier_coverage_0": 0.045683811977505685,
|
|
"rewards/frontier_coverage_1": 0.045683811977505685,
|
|
"rewards/frontier_coverage_10": 0.045683811977505685,
|
|
"rewards/frontier_coverage_15": 0.045683811977505685,
|
|
"rewards/frontier_coverage_20": 0.045683811977505685,
|
|
"rewards/frontier_coverage_25": 0.045683811977505685,
|
|
"rewards/frontier_coverage_5": 0.045683811977505685,
|
|
"rewards/frontier_entropy_batch_reward": -0.27396737039089203,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15417209565639495,
|
|
"signal/accuracy_reward/group_std_mean": 0.20770005285739898,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9896841287612915,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07708604782819747,
|
|
"signal/advantage_abs_mean": 0.7525755763053894,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08958911299705505,
|
|
"signal/advantage_pre_scale_std": 0.14011250436306,
|
|
"signal/advantage_std": 0.9831971049308776,
|
|
"signal/brier_reward/centered_abs_mean": 0.13868292272090912,
|
|
"signal/brier_reward/group_std_mean": 0.1801248759031296,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17836227416992187,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01386829260736704,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023434021696448325,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04013000652194023,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030173908919095993,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023434022441506386,
|
|
"signal/format_reward/centered_abs_mean": 0.011105685587972402,
|
|
"signal/format_reward/group_std_mean": 0.025266989693045618,
|
|
"signal/format_reward/group_zero_std_frac": 0.8805555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07126235738396644,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005552842793986201,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19150737822055816,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.25176058411598207,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0352290228009224,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027385556139051916,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19150737822055816,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.25176058411598207,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0352290228009224,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027385556139051916,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19150737822055816,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.25176058411598207,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0352290228009224,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027385556139051916,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19150737822055816,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.25176058411598207,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0352290228009224,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027385556139051916,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19150737822055816,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.25176058411598207,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0352290228009224,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027385556139051916,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19150737822055816,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.25176058411598207,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0352290228009224,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027385556139051916,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19150737822055816,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.25176058411598207,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0352290228009224,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027385556139051916,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32493494153022767,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39551191329956054,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.41893631815910337,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03249349407851696,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18233118348876434,
|
|
"calibration/batch_distribution_entropy": 0.9831680569385256,
|
|
"calibration/buffer_distribution_entropy": 0.9598290512239208,
|
|
"calibration/confidence_entropy": 0.4897688461891715,
|
|
"calibration/coverage@0%": 0.04602953909832603,
|
|
"calibration/coverage@1%": 0.04602953909832603,
|
|
"calibration/coverage@10%": 0.37472474066657796,
|
|
"calibration/coverage@15%": 0.47529519217066457,
|
|
"calibration/coverage@20%": 0.6035010202835199,
|
|
"calibration/coverage@25%": 0.7062519554070845,
|
|
"calibration/coverage@30%": 0.8091139749667903,
|
|
"calibration/coverage@5%": 0.14104869190855002,
|
|
"calibration/ece": 0.16769359034482495,
|
|
"calibration/mean_confidence": 0.5345419757715583,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.005468750000000022,
|
|
"completions/max_length": 3000.6,
|
|
"completions/max_terminated_length": 3000.6,
|
|
"completions/mean_length": 603.790283203125,
|
|
"completions/mean_terminated_length": 607.152490234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 175.6,
|
|
"epoch": 0.34799565005437433,
|
|
"grad_norm": 0.004024908412247896,
|
|
"learning_rate": 1.8975903614457832e-06,
|
|
"loss": -0.0099,
|
|
"num_tokens": 296382888.0,
|
|
"reward": 1.0045419931411743,
|
|
"reward_std": 0.10734798014163971,
|
|
"rewards/accuracy_reward": 0.7182291746139526,
|
|
"rewards/brier_reward": 0.797440505027771,
|
|
"rewards/confidence_uniqueness_reward": 0.9454038500785827,
|
|
"rewards/format_reward": 0.9941840171813965,
|
|
"rewards/frontier_coverage_0": -0.007990724965929985,
|
|
"rewards/frontier_coverage_1": -0.007990724965929985,
|
|
"rewards/frontier_coverage_10": -0.007990724965929985,
|
|
"rewards/frontier_coverage_15": -0.007990724965929985,
|
|
"rewards/frontier_coverage_20": -0.007990724965929985,
|
|
"rewards/frontier_coverage_25": -0.0050966314971446994,
|
|
"rewards/frontier_coverage_5": -0.007990724965929985,
|
|
"rewards/frontier_entropy_batch_reward": -0.25190583765506747,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12996961772441865,
|
|
"signal/accuracy_reward/group_std_mean": 0.18141130805015565,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.44166667461395265,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9297018647193909,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06498480886220932,
|
|
"signal/advantage_abs_mean": 0.7460544824600219,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07965542376041412,
|
|
"signal/advantage_pre_scale_std": 0.12871635258197783,
|
|
"signal/advantage_std": 0.9830474495887757,
|
|
"signal/brier_reward/centered_abs_mean": 0.1287109524011612,
|
|
"signal/brier_reward/group_std_mean": 0.16658840775489808,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18521082699298858,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012871095538139343,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02220791019499302,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.033020298555493356,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.031845220178365705,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022207909962162374,
|
|
"signal/format_reward/centered_abs_mean": 0.009825303871184587,
|
|
"signal/format_reward/group_std_mean": 0.017753782123327254,
|
|
"signal/format_reward/group_zero_std_frac": 0.9277778029441833,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06998921409249306,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.004912651935592293,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17401364147663118,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23170343041419983,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035727670043706895,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024883949663490057,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17401364147663118,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23170343041419983,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035727670043706895,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024883949663490057,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17401364147663118,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23170343041419983,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.035727670043706895,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024883949663490057,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17401364147663118,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23170343041419983,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.035727670043706895,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024883949663490057,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17401364147663118,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23170343041419983,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.035727670043706895,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024883949663490057,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.15727486312389374,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.20966436564922333,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03222865499556064,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022490305360406636,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17401364147663118,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23170343041419983,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035727670043706895,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024883949663490057,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30758561491966246,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3782775580883026,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.44249006509780886,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030758562684059142,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19084036200929547,
|
|
"calibration/batch_distribution_entropy": 0.9682289496498896,
|
|
"calibration/buffer_distribution_entropy": 0.9670171964368471,
|
|
"calibration/confidence_entropy": 0.5042296552565959,
|
|
"calibration/coverage@0%": 0.06598648652575967,
|
|
"calibration/coverage@1%": 0.0748870100859691,
|
|
"calibration/coverage@10%": 0.36375211584191663,
|
|
"calibration/coverage@15%": 0.4656610314172543,
|
|
"calibration/coverage@20%": 0.5612869886858138,
|
|
"calibration/coverage@25%": 0.6531385987815492,
|
|
"calibration/coverage@30%": 0.7883200065274152,
|
|
"calibration/coverage@5%": 0.29452257920003927,
|
|
"calibration/ece": 0.18335817487450937,
|
|
"calibration/mean_confidence": 0.5571029847794488,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.004774305555555558,
|
|
"completions/max_length": 3239.2,
|
|
"completions/max_terminated_length": 3239.2,
|
|
"completions/mean_length": 701.5655395507813,
|
|
"completions/mean_terminated_length": 704.9570678710937,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 189.6,
|
|
"epoch": 0.3599955000562493,
|
|
"grad_norm": 0.0036906444001942873,
|
|
"learning_rate": 1.7469879518072292e-06,
|
|
"loss": -0.0084,
|
|
"num_tokens": 307575259.0,
|
|
"reward": 0.9870135068893433,
|
|
"reward_std": 0.12090798169374466,
|
|
"rewards/accuracy_reward": 0.6818576455116272,
|
|
"rewards/brier_reward": 0.8064930081367493,
|
|
"rewards/confidence_uniqueness_reward": 0.9434916973114014,
|
|
"rewards/format_reward": 0.9942708134651184,
|
|
"rewards/frontier_coverage_0": 0.02103922632522881,
|
|
"rewards/frontier_coverage_1": 0.02103922632522881,
|
|
"rewards/frontier_coverage_10": 0.02103922632522881,
|
|
"rewards/frontier_coverage_15": 0.02103922632522881,
|
|
"rewards/frontier_coverage_20": 0.021864201012067496,
|
|
"rewards/frontier_coverage_25": 0.04642558991909027,
|
|
"rewards/frontier_coverage_5": 0.02103922632522881,
|
|
"rewards/frontier_entropy_batch_reward": -0.28530060350894926,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1652289465069771,
|
|
"signal/accuracy_reward/group_std_mean": 0.2149661064147949,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0628995418548584,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08261447325348854,
|
|
"signal/advantage_abs_mean": 0.768857729434967,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09277653992176056,
|
|
"signal/advantage_pre_scale_std": 0.141487255692482,
|
|
"signal/advantage_std": 0.9831639409065247,
|
|
"signal/brier_reward/centered_abs_mean": 0.13207932710647582,
|
|
"signal/brier_reward/group_std_mean": 0.16995641589164734,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17336148023605347,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013207933306694031,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02226933278143406,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0336017731577158,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029386086389422417,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002226933324709535,
|
|
"signal/format_reward/centered_abs_mean": 0.008897569379769266,
|
|
"signal/format_reward/group_std_mean": 0.017280596494674682,
|
|
"signal/format_reward/group_zero_std_frac": 0.925000011920929,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05693276599049568,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.004448784689884633,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18552227616310119,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24258872568607331,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03458261713385582,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002652968605980277,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18552227616310119,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24258872568607331,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03458261713385582,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002652968605980277,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18552227616310119,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24258872568607331,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03458261713385582,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002652968605980277,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18552227616310119,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24258872568607331,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03458261713385582,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002652968605980277,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17669001817703248,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23156578838825226,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03303196430206299,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002526667295023799,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06840592995285988,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08922984004020691,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01295476108789444,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009782047942280768,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18552227616310119,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24258872568607331,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03458261713385582,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002652968605980277,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32859750390052794,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3984165847301483,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4377071440219879,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03285974971950054,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.3599955000562493,
|
|
"eval_calibration/aurc": 0.13702523864879065,
|
|
"eval_calibration/batch_distribution_entropy": 0.9284841274227499,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9708200938328989,
|
|
"eval_calibration/confidence_entropy": 0.4971687058154246,
|
|
"eval_calibration/coverage@0%": 0.26293682795698925,
|
|
"eval_calibration/coverage@1%": 0.26293682795698925,
|
|
"eval_calibration/coverage@10%": 0.5304099462365591,
|
|
"eval_calibration/coverage@15%": 0.6609543010752689,
|
|
"eval_calibration/coverage@20%": 0.7239583333333334,
|
|
"eval_calibration/coverage@25%": 0.8385416666666666,
|
|
"eval_calibration/coverage@30%": 0.9322916666666666,
|
|
"eval_calibration/coverage@5%": 0.28897849462365593,
|
|
"eval_calibration/ece": 0.1888974403920867,
|
|
"eval_calibration/mean_confidence": 0.5458924104876176,
|
|
"eval_completions/clipped_ratio": 0.0026041666666666665,
|
|
"eval_completions/max_length": 2327.3333333333335,
|
|
"eval_completions/max_terminated_length": 2327.3333333333335,
|
|
"eval_completions/mean_length": 692.9316202799479,
|
|
"eval_completions/mean_terminated_length": 694.7150370279948,
|
|
"eval_completions/min_length": 158.5,
|
|
"eval_completions/min_terminated_length": 230.16666666666666,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 307575259.0,
|
|
"eval_reward": 0.9107913474241892,
|
|
"eval_reward_std": 0.2171098291873932,
|
|
"eval_rewards/accuracy_reward": 0.6788194378217062,
|
|
"eval_rewards/brier_reward": 0.8023928701877594,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8938767115275065,
|
|
"eval_rewards/format_reward": 0.9973958333333334,
|
|
"eval_rewards/frontier_coverage_0": 0.02134424013396104,
|
|
"eval_rewards/frontier_coverage_1": 0.02134424013396104,
|
|
"eval_rewards/frontier_coverage_10": 0.02134424013396104,
|
|
"eval_rewards/frontier_coverage_15": 0.02134424013396104,
|
|
"eval_rewards/frontier_coverage_20": 0.02452502477293213,
|
|
"eval_rewards/frontier_coverage_25": 0.0642988532781601,
|
|
"eval_rewards/frontier_coverage_5": 0.02134424013396104,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9973958333333334,
|
|
"eval_runtime": 139.2104,
|
|
"eval_samples_per_second": 7.183,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4271918435891469,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4689544787009557,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9881373941898346,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21359592179457346,
|
|
"eval_signal/advantage_abs_mean": 0.8833253582318624,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.19192364563544592,
|
|
"eval_signal/advantage_pre_scale_std": 0.21469872941573462,
|
|
"eval_signal/advantage_std": 0.986367384592692,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.18057803561290106,
|
|
"eval_signal/brier_reward/group_std_mean": 0.23535025119781494,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08358132963379224,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.018057803623378277,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0436480101197958,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05746622569859028,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020154597237706184,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0043648009886965156,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0050455727614462376,
|
|
"eval_signal/format_reward/group_std_mean": 0.014731391333043575,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.9166666766007742,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011093226571877798,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.29528985420862836,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.4035186717907588,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.019564516842365265,
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004222644803424676,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.29528985420862836,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4035186717907588,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.019564516842365265,
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004222644803424676,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.29528985420862836,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4035186717907588,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.019564516842365265,
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004222644803424676,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.29528985420862836,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4035186717907588,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019564516842365265,
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004222644803424676,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.23646595080693564,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.32997279862562817,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015677123485753935,
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003381463116966188,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.09063521524270375,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.11626108984152476,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006016027880832553,
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001296083559282124,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.29528985420862836,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4035186717907588,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.019564516842365265,
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004222644803424676,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0050455727614462376,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.014731391333043575,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666766007742,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0022186453764637313,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0005045573343522847,
|
|
"eval_steps_per_second": 0.043,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.14161491473574045,
|
|
"calibration/batch_distribution_entropy": 0.9785979939228288,
|
|
"calibration/buffer_distribution_entropy": 0.9725325260012785,
|
|
"calibration/confidence_entropy": 0.5026786576895561,
|
|
"calibration/coverage@0%": 0.030372153493650255,
|
|
"calibration/coverage@1%": 0.030372153493650255,
|
|
"calibration/coverage@10%": 0.48445113108853616,
|
|
"calibration/coverage@15%": 0.6141158725686856,
|
|
"calibration/coverage@20%": 0.7234010943737882,
|
|
"calibration/coverage@25%": 0.850517394479151,
|
|
"calibration/coverage@30%": 0.8960138340191106,
|
|
"calibration/coverage@5%": 0.2579811082970747,
|
|
"calibration/ece": 0.1825885505288956,
|
|
"calibration/mean_confidence": 0.5590493941529713,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.003993055555555558,
|
|
"completions/max_length": 3360.2,
|
|
"completions/max_terminated_length": 3360.2,
|
|
"completions/mean_length": 680.30234375,
|
|
"completions/mean_terminated_length": 683.0581176757812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 207.4,
|
|
"epoch": 0.3719953500581243,
|
|
"grad_norm": 0.003881107782945037,
|
|
"learning_rate": 1.5963855421686747e-06,
|
|
"loss": -0.0034,
|
|
"num_tokens": 318520054.0,
|
|
"reward": 1.0143356561660766,
|
|
"reward_std": 0.11488137692213059,
|
|
"rewards/accuracy_reward": 0.7287326455116272,
|
|
"rewards/brier_reward": 0.8206124186515809,
|
|
"rewards/confidence_uniqueness_reward": 0.9463862299919128,
|
|
"rewards/format_reward": 0.9959201574325561,
|
|
"rewards/frontier_coverage_0": 0.008624611730920152,
|
|
"rewards/frontier_coverage_1": 0.008624611730920152,
|
|
"rewards/frontier_coverage_10": 0.008623575296951458,
|
|
"rewards/frontier_coverage_15": 0.008593602268956602,
|
|
"rewards/frontier_coverage_20": 0.019597085565328597,
|
|
"rewards/frontier_coverage_25": 0.09769158065319061,
|
|
"rewards/frontier_coverage_5": 0.008624611730920152,
|
|
"rewards/frontier_entropy_batch_reward": -0.26984030604362486,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15015733242034912,
|
|
"signal/accuracy_reward/group_std_mean": 0.20241186618804932,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4083333432674408,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0236715793609619,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07507866621017456,
|
|
"signal/advantage_abs_mean": 0.7554642915725708,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0852625235915184,
|
|
"signal/advantage_pre_scale_std": 0.13472781628370284,
|
|
"signal/advantage_std": 0.9831080079078675,
|
|
"signal/brier_reward/centered_abs_mean": 0.1314813494682312,
|
|
"signal/brier_reward/group_std_mean": 0.16985029578208924,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18111539185047149,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013148135691881179,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020419245585799217,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03257020190358162,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02790914885699749,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020419245585799215,
|
|
"signal/format_reward/centered_abs_mean": 0.007546658022329211,
|
|
"signal/format_reward/group_std_mean": 0.016791296564042567,
|
|
"signal/format_reward/group_zero_std_frac": 0.9222222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05060187578201294,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0037733290111646054,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19345370233058928,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2506356716156006,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03803465738892555,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027663879096508025,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19345370233058928,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2506356716156006,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03803465738892555,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027663879096508025,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19344922304153442,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2506299793720245,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03803384155035019,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002766323834657669,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19292726516723632,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2499801516532898,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03793646469712257,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027588598895817995,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1353215456008911,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.17713548839092255,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02666233666241169,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001935098133981228,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08023149967193603,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10117035806179046,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015790591202676296,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001147310435771942,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19345370233058928,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2506356716156006,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03803465738892555,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027663879096508025,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3274266362190247,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39751541018486025,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4508472442626953,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03274266496300697,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.09399491959647298,
|
|
"calibration/batch_distribution_entropy": 0.9412546711952994,
|
|
"calibration/buffer_distribution_entropy": 0.9754604476125126,
|
|
"calibration/confidence_entropy": 0.4770566756690894,
|
|
"calibration/coverage@0%": 0.09393830903780045,
|
|
"calibration/coverage@1%": 0.09393830903780045,
|
|
"calibration/coverage@10%": 0.7389721201296986,
|
|
"calibration/coverage@15%": 0.843777086332959,
|
|
"calibration/coverage@20%": 0.880931680869679,
|
|
"calibration/coverage@25%": 0.9050462602561364,
|
|
"calibration/coverage@30%": 0.9182058047493402,
|
|
"calibration/coverage@5%": 0.5374245034399536,
|
|
"calibration/ece": 0.16942426673168062,
|
|
"calibration/mean_confidence": 0.6137393061839143,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006770833333333326,
|
|
"completions/max_length": 3504.0,
|
|
"completions/max_terminated_length": 3504.0,
|
|
"completions/mean_length": 693.0671875,
|
|
"completions/mean_terminated_length": 697.8524291992187,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 187.6,
|
|
"epoch": 0.38399520005999926,
|
|
"grad_norm": 0.003740179119631648,
|
|
"learning_rate": 1.4457831325301204e-06,
|
|
"loss": -0.0148,
|
|
"num_tokens": 329591484.0,
|
|
"reward": 0.9867475509643555,
|
|
"reward_std": 0.1178449347615242,
|
|
"rewards/accuracy_reward": 0.6817708253860474,
|
|
"rewards/brier_reward": 0.8016348242759704,
|
|
"rewards/confidence_uniqueness_reward": 0.9426298260688781,
|
|
"rewards/format_reward": 0.9927083253860474,
|
|
"rewards/frontier_coverage_0": 0.02299555651843548,
|
|
"rewards/frontier_coverage_1": 0.02299555651843548,
|
|
"rewards/frontier_coverage_10": 0.02299380600452423,
|
|
"rewards/frontier_coverage_15": 0.022937557473778725,
|
|
"rewards/frontier_coverage_20": 0.027411183714866637,
|
|
"rewards/frontier_coverage_25": 0.10199409276247025,
|
|
"rewards/frontier_coverage_5": 0.02299555651843548,
|
|
"rewards/frontier_entropy_batch_reward": -0.28412319123744967,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14393446147441863,
|
|
"signal/accuracy_reward/group_std_mean": 0.1882144957780838,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.47222222089767457,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0052073359489442,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07196723073720931,
|
|
"signal/advantage_abs_mean": 0.7603908181190491,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08789721131324768,
|
|
"signal/advantage_pre_scale_std": 0.14016545712947845,
|
|
"signal/advantage_std": 0.9830867886543274,
|
|
"signal/brier_reward/centered_abs_mean": 0.13717943131923677,
|
|
"signal/brier_reward/group_std_mean": 0.17664145231246947,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19173648059368134,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013717942871153355,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025140639021992685,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.041734833270311356,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.034909750893712045,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025140638928860424,
|
|
"signal/format_reward/centered_abs_mean": 0.012847222201526166,
|
|
"signal/format_reward/group_std_mean": 0.026805402338504793,
|
|
"signal/format_reward/group_zero_std_frac": 0.8805555701255798,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08747010976076126,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006423611100763083,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1893948495388031,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24652081727981567,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037821638584136966,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002708346350118518,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1893948495388031,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24652081727981567,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037821638584136966,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002708346350118518,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18938452005386353,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2465077221393585,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.037819582223892215,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002708198828622699,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18859367370605468,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24550087451934816,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03766307979822159,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026968895457684995,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1072016030550003,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.139630264043808,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02150789238512516,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015329829417169093,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08940613269805908,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11217249184846878,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017939681187272072,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012785077095031738,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1893948495388031,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24652081727981567,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.037821638584136966,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002708346350118518,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32551802396774293,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39456554055213927,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45712563395500183,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03255180567502976,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.12667895964624948,
|
|
"calibration/batch_distribution_entropy": 0.9667500631719177,
|
|
"calibration/buffer_distribution_entropy": 0.9768246962707771,
|
|
"calibration/confidence_entropy": 0.4787488938658747,
|
|
"calibration/coverage@0%": 0.08587612683228549,
|
|
"calibration/coverage@1%": 0.08587612683228549,
|
|
"calibration/coverage@10%": 0.5596784657665852,
|
|
"calibration/coverage@15%": 0.658714518716747,
|
|
"calibration/coverage@20%": 0.7396279283724964,
|
|
"calibration/coverage@25%": 0.8107754068022667,
|
|
"calibration/coverage@30%": 0.8813963204972873,
|
|
"calibration/coverage@5%": 0.3803046427900564,
|
|
"calibration/ece": 0.20473769314593335,
|
|
"calibration/mean_confidence": 0.5073165586899123,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009895833333333348,
|
|
"completions/max_length": 3408.6,
|
|
"completions/max_terminated_length": 3408.6,
|
|
"completions/mean_length": 766.8527099609375,
|
|
"completions/mean_terminated_length": 774.5172241210937,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 247.6,
|
|
"epoch": 0.39599505006187424,
|
|
"grad_norm": 0.0031843557953834534,
|
|
"learning_rate": 1.2951807228915664e-06,
|
|
"loss": -0.0273,
|
|
"num_tokens": 341564699.0,
|
|
"reward": 0.9761411070823669,
|
|
"reward_std": 0.11881034970283508,
|
|
"rewards/accuracy_reward": 0.6574652791023254,
|
|
"rewards/brier_reward": 0.7964750170707703,
|
|
"rewards/confidence_uniqueness_reward": 0.9407760500907898,
|
|
"rewards/format_reward": 0.9895833253860473,
|
|
"rewards/frontier_coverage_0": 0.04257221892476082,
|
|
"rewards/frontier_coverage_1": 0.04257221892476082,
|
|
"rewards/frontier_coverage_10": 0.04257510676980018,
|
|
"rewards/frontier_coverage_15": 0.04277926944196224,
|
|
"rewards/frontier_coverage_20": 0.046192364767193794,
|
|
"rewards/frontier_coverage_25": 0.10422454476356506,
|
|
"rewards/frontier_coverage_5": 0.04257221892476082,
|
|
"rewards/frontier_entropy_batch_reward": -0.26306197941303255,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1366536423563957,
|
|
"signal/accuracy_reward/group_std_mean": 0.18269501626491547,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9548314452171326,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06832682117819786,
|
|
"signal/advantage_abs_mean": 0.7397696733474731,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08615766167640686,
|
|
"signal/advantage_pre_scale_std": 0.1432511866092682,
|
|
"signal/advantage_std": 0.9830728769302368,
|
|
"signal/brier_reward/centered_abs_mean": 0.13853301703929902,
|
|
"signal/brier_reward/group_std_mean": 0.17815548181533813,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1958606421947479,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013853302784264087,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02927153408527374,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05202648937702179,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04150531962513924,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002927153604105115,
|
|
"signal/format_reward/centered_abs_mean": 0.01867404468357563,
|
|
"signal/format_reward/group_std_mean": 0.03949138410389423,
|
|
"signal/format_reward/group_zero_std_frac": 0.8222222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1326592281460762,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009337022341787815,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20547735095024108,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2617917537689209,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.041419435292482376,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002938326168805361,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20547735095024108,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2617917537689209,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.041419435292482376,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002938326168805361,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20546633899211883,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2617780089378357,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04141724780201912,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029381686355918644,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2041507601737976,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2601293295621872,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04115338325500488,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029193558264523746,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09043723046779632,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11598165482282638,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018281865678727627,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012932523852214218,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0844599574804306,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10744838416576385,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017018306627869607,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012077773921191693,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20547735095024108,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2617917537689209,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.041419435292482376,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002938326168805361,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32049464583396914,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39367471933364867,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4526883363723755,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03204946555197239,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.12042923465880181,
|
|
"calibration/batch_distribution_entropy": 0.9231336191494597,
|
|
"calibration/buffer_distribution_entropy": 0.9772784581546675,
|
|
"calibration/confidence_entropy": 0.4722359481486856,
|
|
"calibration/coverage@0%": 0.02619224996737286,
|
|
"calibration/coverage@1%": 0.02619224996737286,
|
|
"calibration/coverage@10%": 0.5754376567915199,
|
|
"calibration/coverage@15%": 0.6780955222879599,
|
|
"calibration/coverage@20%": 0.8664804654079841,
|
|
"calibration/coverage@25%": 0.9255724249938371,
|
|
"calibration/coverage@30%": 0.9763779527559056,
|
|
"calibration/coverage@5%": 0.21658759117472193,
|
|
"calibration/ece": 0.11244536955518544,
|
|
"calibration/mean_confidence": 0.6432379778464519,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010677083333333327,
|
|
"completions/max_length": 3379.8,
|
|
"completions/max_terminated_length": 3379.8,
|
|
"completions/mean_length": 764.9436767578125,
|
|
"completions/mean_terminated_length": 773.263427734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 232.4,
|
|
"epoch": 0.4079949000637492,
|
|
"grad_norm": 0.0034331590868532658,
|
|
"learning_rate": 1.1445783132530121e-06,
|
|
"loss": -0.0318,
|
|
"num_tokens": 353466034.0,
|
|
"reward": 1.001962959766388,
|
|
"reward_std": 0.12031054049730301,
|
|
"rewards/accuracy_reward": 0.7228298544883728,
|
|
"rewards/brier_reward": 0.8268496513366699,
|
|
"rewards/confidence_uniqueness_reward": 0.9339795589447022,
|
|
"rewards/format_reward": 0.9889756798744201,
|
|
"rewards/frontier_coverage_0": 0.018969600554555655,
|
|
"rewards/frontier_coverage_1": 0.018969600554555655,
|
|
"rewards/frontier_coverage_10": 0.01897885270882398,
|
|
"rewards/frontier_coverage_15": 0.02006477633258328,
|
|
"rewards/frontier_coverage_20": 0.05282620638608933,
|
|
"rewards/frontier_coverage_25": 0.15781393945217131,
|
|
"rewards/frontier_coverage_5": 0.018969600554555655,
|
|
"rewards/frontier_entropy_batch_reward": -0.34407015442848204,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.13177625983953475,
|
|
"signal/accuracy_reward/group_std_mean": 0.1782878965139389,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.48055556416511536,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.957055127620697,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06588812991976738,
|
|
"signal/advantage_abs_mean": 0.7518744587898254,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08772747218608856,
|
|
"signal/advantage_pre_scale_std": 0.14746004790067674,
|
|
"signal/advantage_std": 0.9830225229263305,
|
|
"signal/brier_reward/centered_abs_mean": 0.12697158604860306,
|
|
"signal/brier_reward/group_std_mean": 0.16580133736133576,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18501022160053254,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012697158567607402,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03250643089413643,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.054881346970796586,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0470227912068367,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003250643378123641,
|
|
"signal/format_reward/centered_abs_mean": 0.01877712644636631,
|
|
"signal/format_reward/group_std_mean": 0.03860697820782662,
|
|
"signal/format_reward/group_zero_std_frac": 0.8305555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.13337477520108224,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009388563223183155,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1576144963502884,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21049812138080598,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03282146006822586,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022538872435688972,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1576144963502884,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21049812138080598,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03282146006822586,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022538872435688972,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15755284130573272,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2104198604822159,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03280867114663124,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022530056070536376,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1547604590654373,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20678509175777435,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03222393654286861,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002213074592873454,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06509168595075607,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08332156985998154,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01360565610229969,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009308110922574997,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10484070777893066,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13356612026691436,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02198589891195297,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014992221491411327,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1576144963502884,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21049812138080598,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03282146006822586,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022538872435688972,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34670414328575133,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41445213556289673,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5086399018764496,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03467041626572609,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.09662731950335442,
|
|
"calibration/batch_distribution_entropy": 0.967024441243046,
|
|
"calibration/buffer_distribution_entropy": 0.9772981503148188,
|
|
"calibration/confidence_entropy": 0.5046960184786891,
|
|
"calibration/coverage@0%": 0.08649086696074063,
|
|
"calibration/coverage@1%": 0.1979207803453709,
|
|
"calibration/coverage@10%": 0.6744557656695441,
|
|
"calibration/coverage@15%": 0.7621469822036061,
|
|
"calibration/coverage@20%": 0.850881153068473,
|
|
"calibration/coverage@25%": 0.9165939939987974,
|
|
"calibration/coverage@30%": 0.9591402590245247,
|
|
"calibration/coverage@5%": 0.3844438926821537,
|
|
"calibration/ece": 0.16998348481003883,
|
|
"calibration/mean_confidence": 0.5691553376825024,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009201388888888907,
|
|
"completions/max_length": 3807.2,
|
|
"completions/max_terminated_length": 3807.2,
|
|
"completions/mean_length": 819.2960205078125,
|
|
"completions/mean_terminated_length": 826.9532104492188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 258.4,
|
|
"epoch": 0.4199947500656242,
|
|
"grad_norm": 0.0033503889571875334,
|
|
"learning_rate": 9.93975903614458e-07,
|
|
"loss": -0.0224,
|
|
"num_tokens": 366012292.0,
|
|
"reward": 1.0019341111183167,
|
|
"reward_std": 0.12585299015045165,
|
|
"rewards/accuracy_reward": 0.7077257037162781,
|
|
"rewards/brier_reward": 0.8151641726493836,
|
|
"rewards/confidence_uniqueness_reward": 0.9411271095275879,
|
|
"rewards/format_reward": 0.9901041746139526,
|
|
"rewards/frontier_coverage_0": 0.014120917581021786,
|
|
"rewards/frontier_coverage_1": 0.014120917581021786,
|
|
"rewards/frontier_coverage_10": 0.014142588526010514,
|
|
"rewards/frontier_coverage_15": 0.014934336580336095,
|
|
"rewards/frontier_coverage_20": 0.053513363003730774,
|
|
"rewards/frontier_coverage_25": 0.1392007663846016,
|
|
"rewards/frontier_coverage_5": 0.014121649414300918,
|
|
"rewards/frontier_entropy_batch_reward": -0.26387418806552887,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15167643427848815,
|
|
"signal/accuracy_reward/group_std_mean": 0.2053221881389618,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.008397400379181,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07583821713924407,
|
|
"signal/advantage_abs_mean": 0.7449156880378723,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09188491851091385,
|
|
"signal/advantage_pre_scale_std": 0.1496666193008423,
|
|
"signal/advantage_std": 0.9831571817398072,
|
|
"signal/brier_reward/centered_abs_mean": 0.12859825491905214,
|
|
"signal/brier_reward/group_std_mean": 0.16735298037528992,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1708065688610077,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012859826162457465,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02758704237639904,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.045367203652858734,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03653429411351681,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027587041724473236,
|
|
"signal/format_reward/centered_abs_mean": 0.016514757089316844,
|
|
"signal/format_reward/group_std_mean": 0.032019348442554475,
|
|
"signal/format_reward/group_zero_std_frac": 0.8638888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10936851501464843,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008257378544658422,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18040508925914764,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23544572591781615,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03435261063277721,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025797927286475898,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18040508925914764,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23544572591781615,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03435261063277721,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025797927286475898,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18032192587852477,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.235341677069664,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03433669619262218,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025786036625504495,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1752968579530716,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22891083657741546,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.033378247171640396,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002506745047867298,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06402314454317093,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08175744861364365,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012168211303651333,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009155309875495732,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09939492493867874,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12737924307584764,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018856792896986007,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001421347470022738,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18040342926979064,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23544372022151946,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.034352288022637366,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002579768933355808,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3185386657714844,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3876194655895233,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.42217653393745425,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0318538673222065,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.08526290527356786,
|
|
"calibration/batch_distribution_entropy": 0.9492263091629465,
|
|
"calibration/buffer_distribution_entropy": 0.9778171307399122,
|
|
"calibration/confidence_entropy": 0.49997622614018694,
|
|
"calibration/coverage@0%": 0.05113034229386461,
|
|
"calibration/coverage@1%": 0.09365002733323467,
|
|
"calibration/coverage@10%": 0.7473668466249658,
|
|
"calibration/coverage@15%": 0.8666818025240637,
|
|
"calibration/coverage@20%": 0.9463922907613297,
|
|
"calibration/coverage@25%": 0.9740053050397878,
|
|
"calibration/coverage@30%": 0.9946949602122016,
|
|
"calibration/coverage@5%": 0.3784139398978424,
|
|
"calibration/ece": 0.166516695461729,
|
|
"calibration/mean_confidence": 0.6187747295165235,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009982638888888862,
|
|
"completions/max_length": 3491.4,
|
|
"completions/max_terminated_length": 3491.4,
|
|
"completions/mean_length": 794.2389770507813,
|
|
"completions/mean_terminated_length": 802.2543701171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 254.2,
|
|
"epoch": 0.4319946000674992,
|
|
"grad_norm": 0.003332644235342741,
|
|
"learning_rate": 8.433734939759036e-07,
|
|
"loss": -0.0247,
|
|
"num_tokens": 378261893.0,
|
|
"reward": 1.0071517705917359,
|
|
"reward_std": 0.12117233127355576,
|
|
"rewards/accuracy_reward": 0.725781238079071,
|
|
"rewards/brier_reward": 0.8080682635307312,
|
|
"rewards/confidence_uniqueness_reward": 0.938874113559723,
|
|
"rewards/format_reward": 0.9896701335906982,
|
|
"rewards/frontier_coverage_0": -0.003929438255727291,
|
|
"rewards/frontier_coverage_1": -0.003929438255727291,
|
|
"rewards/frontier_coverage_10": -0.0038942765444517136,
|
|
"rewards/frontier_coverage_15": -0.004395973100326955,
|
|
"rewards/frontier_coverage_20": 0.05808563455939293,
|
|
"rewards/frontier_coverage_25": 0.14676995277404786,
|
|
"rewards/frontier_coverage_5": -0.003927422594279051,
|
|
"rewards/frontier_entropy_batch_reward": -0.27910477519035337,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1342068150639534,
|
|
"signal/accuracy_reward/group_std_mean": 0.17787945568561553,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4888888895511627,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9742860913276672,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0671034075319767,
|
|
"signal/advantage_abs_mean": 0.7551454067230224,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08928989768028259,
|
|
"signal/advantage_pre_scale_std": 0.14911575615406036,
|
|
"signal/advantage_std": 0.9830282688140869,
|
|
"signal/brier_reward/centered_abs_mean": 0.1267807200551033,
|
|
"signal/brier_reward/group_std_mean": 0.16448463797569274,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18453309237957,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012678072415292263,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029533155634999274,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.050531229376792906,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04347648099064827,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029533156659454106,
|
|
"signal/format_reward/centered_abs_mean": 0.018115234375,
|
|
"signal/format_reward/group_std_mean": 0.036967866495251654,
|
|
"signal/format_reward/group_zero_std_frac": 0.8361111283302307,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.13411470055580138,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0090576171875,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1632930189371109,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21255632638931274,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03398923799395561,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023350901901721954,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1632930189371109,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21255632638931274,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03398923799395561,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023350901901721954,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1632182240486145,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21246310472488403,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03397372327744961,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023340205661952496,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15129518806934356,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1972308337688446,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03146095797419548,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021635211771354078,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0666133850812912,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08397864252328872,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01388755403459072,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009525713743641972,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10736925154924393,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13568062484264373,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.022355619445443155,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015353802824392914,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16328986287117003,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21255233883857727,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.033988584950566295,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002335045067593455,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.321364963054657,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38714643120765685,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4681834578514099,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03213649578392506,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.147813420478697,
|
|
"calibration/batch_distribution_entropy": 0.9615515889414834,
|
|
"calibration/buffer_distribution_entropy": 0.9768709358020142,
|
|
"calibration/confidence_entropy": 0.48301767115068583,
|
|
"calibration/coverage@0%": 0.021637299525751035,
|
|
"calibration/coverage@1%": 0.021637299525751035,
|
|
"calibration/coverage@10%": 0.29680602288229385,
|
|
"calibration/coverage@15%": 0.7420242355127942,
|
|
"calibration/coverage@20%": 0.8569570879698016,
|
|
"calibration/coverage@25%": 0.938528549490437,
|
|
"calibration/coverage@30%": 0.9830238726790451,
|
|
"calibration/coverage@5%": 0.04941376570265051,
|
|
"calibration/ece": 0.18628405567119197,
|
|
"calibration/mean_confidence": 0.5851708761244938,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010503472222222232,
|
|
"completions/max_length": 3336.4,
|
|
"completions/max_terminated_length": 3336.4,
|
|
"completions/mean_length": 798.7246704101562,
|
|
"completions/mean_terminated_length": 807.3341674804688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 219.0,
|
|
"epoch": 0.44399445006937416,
|
|
"grad_norm": 0.0037687718868255615,
|
|
"learning_rate": 6.927710843373495e-07,
|
|
"loss": -0.026,
|
|
"num_tokens": 390553249.0,
|
|
"reward": 0.9922348737716675,
|
|
"reward_std": 0.1255135342478752,
|
|
"rewards/accuracy_reward": 0.6885416626930236,
|
|
"rewards/brier_reward": 0.8148928165435791,
|
|
"rewards/confidence_uniqueness_reward": 0.939547860622406,
|
|
"rewards/format_reward": 0.9893229126930236,
|
|
"rewards/frontier_coverage_0": 0.02959106657654047,
|
|
"rewards/frontier_coverage_1": 0.02959106657654047,
|
|
"rewards/frontier_coverage_10": 0.029620955046266318,
|
|
"rewards/frontier_coverage_15": 0.030684778385329993,
|
|
"rewards/frontier_coverage_20": 0.07125861793756486,
|
|
"rewards/frontier_coverage_25": 0.15161574482917786,
|
|
"rewards/frontier_coverage_5": 0.029591639526188374,
|
|
"rewards/frontier_entropy_batch_reward": -0.2746046096086502,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15325520634651185,
|
|
"signal/accuracy_reward/group_std_mean": 0.20086986124515532,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.42777777910232545,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.050264871120453,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07662760317325593,
|
|
"signal/advantage_abs_mean": 0.7525103449821472,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09348735362291336,
|
|
"signal/advantage_pre_scale_std": 0.15054976642131807,
|
|
"signal/advantage_std": 0.9831097364425659,
|
|
"signal/brier_reward/centered_abs_mean": 0.13104279041290284,
|
|
"signal/brier_reward/group_std_mean": 0.16971164345741271,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18007910549640654,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013104279339313508,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02803487591445446,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04839293137192726,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.038316420093178746,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028034877497702835,
|
|
"signal/format_reward/centered_abs_mean": 0.01681315116584301,
|
|
"signal/format_reward/group_std_mean": 0.03504555374383926,
|
|
"signal/format_reward/group_zero_std_frac": 0.8416666626930237,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1139563001692295,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008406575582921506,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1829205185174942,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23725357055664062,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035879862308502194,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026157634798437356,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1829205185174942,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23725357055664062,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035879862308502194,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026157634798437356,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18279743790626526,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23710041046142577,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03585578799247742,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026140033267438413,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16599198877811433,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2160373091697693,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03256378434598446,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023736854549497367,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07057978808879853,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08835373222827911,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.013883821666240692,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010092909797094762,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10900045037269593,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13839271068572997,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02145172506570816,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015587064437568188,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18291856944561005,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23725113570690154,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03587948232889175,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002615735540166497,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3228137791156769,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39099804162979124,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.44452112913131714,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032281379029154775,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.10794396304820482,
|
|
"calibration/batch_distribution_entropy": 0.9318776316967142,
|
|
"calibration/buffer_distribution_entropy": 0.9766401009144714,
|
|
"calibration/confidence_entropy": 0.48874635286794427,
|
|
"calibration/coverage@0%": 0.02941019738198198,
|
|
"calibration/coverage@1%": 0.02941019738198198,
|
|
"calibration/coverage@10%": 0.566576001128133,
|
|
"calibration/coverage@15%": 0.7814061012715239,
|
|
"calibration/coverage@20%": 0.9241913307868799,
|
|
"calibration/coverage@25%": 0.9963446475195823,
|
|
"calibration/coverage@30%": 1.0,
|
|
"calibration/coverage@5%": 0.39669069301030124,
|
|
"calibration/ece": 0.17917218190945836,
|
|
"calibration/mean_confidence": 0.6358024523294838,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006770833333333326,
|
|
"completions/max_length": 3528.6,
|
|
"completions/max_terminated_length": 3528.6,
|
|
"completions/mean_length": 782.6207641601562,
|
|
"completions/mean_terminated_length": 787.9698608398437,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 245.4,
|
|
"epoch": 0.45599430007124914,
|
|
"grad_norm": 0.004032325465232134,
|
|
"learning_rate": 5.421686746987952e-07,
|
|
"loss": -0.0193,
|
|
"num_tokens": 402651984.0,
|
|
"reward": 1.0132048964500426,
|
|
"reward_std": 0.11641700565814972,
|
|
"rewards/accuracy_reward": 0.7356770753860473,
|
|
"rewards/brier_reward": 0.8206545829772949,
|
|
"rewards/confidence_uniqueness_reward": 0.9407994031906128,
|
|
"rewards/format_reward": 0.9932291626930236,
|
|
"rewards/frontier_coverage_0": 0.005415836116299033,
|
|
"rewards/frontier_coverage_1": 0.005415836116299033,
|
|
"rewards/frontier_coverage_10": 0.005474161216989159,
|
|
"rewards/frontier_coverage_15": 0.009392570797353983,
|
|
"rewards/frontier_coverage_20": 0.0829792320728302,
|
|
"rewards/frontier_coverage_25": 0.17440233528614044,
|
|
"rewards/frontier_coverage_5": 0.005415877094492316,
|
|
"rewards/frontier_entropy_batch_reward": -0.31519128680229186,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14232313483953477,
|
|
"signal/accuracy_reward/group_std_mean": 0.18668197989463806,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4694444537162781,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0415077209472656,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07116156741976738,
|
|
"signal/advantage_abs_mean": 0.7595191597938538,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08674072474241257,
|
|
"signal/advantage_pre_scale_std": 0.14214332550764083,
|
|
"signal/advantage_std": 0.9830122709274292,
|
|
"signal/brier_reward/centered_abs_mean": 0.12559207975864412,
|
|
"signal/brier_reward/group_std_mean": 0.16448963582515716,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18423607349395751,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01255920883268118,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025163330510258673,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0417561799287796,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0369965672492981,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002516333060339093,
|
|
"signal/format_reward/centered_abs_mean": 0.0123046875,
|
|
"signal/format_reward/group_std_mean": 0.0261313796043396,
|
|
"signal/format_reward/group_zero_std_frac": 0.8833333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08963212668895722,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00615234375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17310574054718017,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22561688125133514,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036221811175346376,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024754120968282223,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17310574054718017,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22561688125133514,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036221811175346376,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024754120968282223,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17299672365188598,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22547802329063416,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.036198879778385165,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024738531094044445,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15460915565490724,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20132783949375152,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03233877532184124,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022109109442681072,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07516934722661972,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09350252896547318,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015893686562776566,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010749216424301266,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11685722768306732,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14719865024089812,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.024771924316883086,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016710583819076418,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1731052041053772,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22561621367931367,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0362217016518116,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024754045065492392,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33898064494132996,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40483956336975097,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.500599205493927,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03389806374907493,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16990665201284244,
|
|
"calibration/batch_distribution_entropy": 0.9763861379418843,
|
|
"calibration/buffer_distribution_entropy": 0.9756461996279331,
|
|
"calibration/confidence_entropy": 0.4907997092983952,
|
|
"calibration/coverage@0%": 0.04279541816108065,
|
|
"calibration/coverage@1%": 0.04279541816108065,
|
|
"calibration/coverage@10%": 0.3558397094996633,
|
|
"calibration/coverage@15%": 0.48278590372538516,
|
|
"calibration/coverage@20%": 0.6133848323851534,
|
|
"calibration/coverage@25%": 0.8433561312198542,
|
|
"calibration/coverage@30%": 0.8935762652705062,
|
|
"calibration/coverage@5%": 0.19339417144192056,
|
|
"calibration/ece": 0.1626894563658173,
|
|
"calibration/mean_confidence": 0.5491100531123216,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0057291666666666515,
|
|
"completions/max_length": 3678.0,
|
|
"completions/max_terminated_length": 3678.0,
|
|
"completions/mean_length": 815.7873168945313,
|
|
"completions/mean_terminated_length": 820.5162841796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 215.8,
|
|
"epoch": 0.46799415007312406,
|
|
"grad_norm": 0.0038923481479287148,
|
|
"learning_rate": 3.91566265060241e-07,
|
|
"loss": -0.0136,
|
|
"num_tokens": 415130718.0,
|
|
"reward": 0.993002200126648,
|
|
"reward_std": 0.11667114496231079,
|
|
"rewards/accuracy_reward": 0.6849826335906982,
|
|
"rewards/brier_reward": 0.8041805863380432,
|
|
"rewards/confidence_uniqueness_reward": 0.9449598073959351,
|
|
"rewards/format_reward": 0.9940104246139526,
|
|
"rewards/frontier_coverage_0": 0.023747061751782893,
|
|
"rewards/frontier_coverage_1": 0.023747061751782893,
|
|
"rewards/frontier_coverage_10": 0.023766111955046652,
|
|
"rewards/frontier_coverage_15": 0.027177707105875016,
|
|
"rewards/frontier_coverage_20": 0.07301479429006577,
|
|
"rewards/frontier_coverage_25": 0.1472606360912323,
|
|
"rewards/frontier_coverage_5": 0.023747061751782893,
|
|
"rewards/frontier_entropy_batch_reward": -0.26305546462535856,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14096679538488388,
|
|
"signal/accuracy_reward/group_std_mean": 0.19119617640972136,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43055555820465086,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9744200944900513,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07048339769244194,
|
|
"signal/advantage_abs_mean": 0.7451924324035645,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08635586649179458,
|
|
"signal/advantage_pre_scale_std": 0.13891534209251405,
|
|
"signal/advantage_std": 0.9830895900726319,
|
|
"signal/brier_reward/centered_abs_mean": 0.13546755462884902,
|
|
"signal/brier_reward/group_std_mean": 0.17547394037246705,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18838207721710204,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01354675628244877,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02242642156779766,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03652404025197029,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.031294023245573045,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022426421754062177,
|
|
"signal/format_reward/centered_abs_mean": 0.010574001539498567,
|
|
"signal/format_reward/group_std_mean": 0.022073457762598992,
|
|
"signal/format_reward/group_zero_std_frac": 0.9000000119209289,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07329605147242546,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0052870007697492834,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1862773597240448,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24120102524757386,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03693324699997902,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026637662667781115,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1862773597240448,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24120102524757386,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03693324699997902,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026637662667781115,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1861777275800705,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2410757929086685,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.036913507431745526,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026623413898050783,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1608198195695877,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20852963030338287,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.031917137652635576,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022997234016656877,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07476909160614013,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09405903220176696,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.014968187920749188,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010691980132833123,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1143454447388649,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.145367094874382,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.022907671332359315,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00163513976149261,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1862773597240448,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24120102524757386,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03693324699997902,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026637662667781115,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31905388832092285,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39239723086357114,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.44702168107032775,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031905388832092284,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.14559693191858122,
|
|
"calibration/batch_distribution_entropy": 0.9603313286311146,
|
|
"calibration/buffer_distribution_entropy": 0.9761309450014393,
|
|
"calibration/confidence_entropy": 0.5010567276644734,
|
|
"calibration/coverage@0%": 0.016820017762200986,
|
|
"calibration/coverage@1%": 0.016820017762200986,
|
|
"calibration/coverage@10%": 0.3913710631099086,
|
|
"calibration/coverage@15%": 0.6379543175606383,
|
|
"calibration/coverage@20%": 0.8828092626318101,
|
|
"calibration/coverage@25%": 0.9403797520265744,
|
|
"calibration/coverage@30%": 0.9712192254589695,
|
|
"calibration/coverage@5%": 0.14037182782463625,
|
|
"calibration/ece": 0.1773901327159586,
|
|
"calibration/mean_confidence": 0.5998158055084197,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006944444444444442,
|
|
"completions/max_length": 3106.8,
|
|
"completions/max_terminated_length": 3106.8,
|
|
"completions/mean_length": 786.4341918945313,
|
|
"completions/mean_terminated_length": 791.9395629882813,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 230.2,
|
|
"epoch": 0.47999400007499904,
|
|
"grad_norm": 0.0037775628734380007,
|
|
"learning_rate": 2.409638554216868e-07,
|
|
"loss": -0.0112,
|
|
"num_tokens": 427258248.0,
|
|
"reward": 0.9977959275245667,
|
|
"reward_std": 0.1135590761899948,
|
|
"rewards/accuracy_reward": 0.692968738079071,
|
|
"rewards/brier_reward": 0.8162111163139343,
|
|
"rewards/confidence_uniqueness_reward": 0.9434576988220215,
|
|
"rewards/format_reward": 0.9928819417953492,
|
|
"rewards/frontier_coverage_0": 0.03058276418596506,
|
|
"rewards/frontier_coverage_1": 0.03058276418596506,
|
|
"rewards/frontier_coverage_10": 0.030608633439987896,
|
|
"rewards/frontier_coverage_15": 0.034898260794579986,
|
|
"rewards/frontier_coverage_20": 0.0822305366396904,
|
|
"rewards/frontier_coverage_25": 0.1586170792579651,
|
|
"rewards/frontier_coverage_5": 0.03058276418596506,
|
|
"rewards/frontier_entropy_batch_reward": -0.26789160668849943,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.13543294370174408,
|
|
"signal/accuracy_reward/group_std_mean": 0.1795397073030472,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4833333432674408,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9879457116127014,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06771647185087204,
|
|
"signal/advantage_abs_mean": 0.7566372156143188,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08582200407981873,
|
|
"signal/advantage_pre_scale_std": 0.13774674534797668,
|
|
"signal/advantage_std": 0.9830132961273194,
|
|
"signal/brier_reward/centered_abs_mean": 0.12440891414880753,
|
|
"signal/brier_reward/group_std_mean": 0.16094110310077667,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1832536369562149,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012440891563892364,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023261058330535888,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0364607434719801,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0340243112295866,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002326105860993266,
|
|
"signal/format_reward/centered_abs_mean": 0.011458333395421506,
|
|
"signal/format_reward/group_std_mean": 0.021999170631170274,
|
|
"signal/format_reward/group_zero_std_frac": 0.9027777910232544,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.08310093134641647,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005729166697710753,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1727170765399933,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22679380774497987,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03630736693739891,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002469854103401303,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1727170765399933,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22679380774497987,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03630736693739891,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002469854103401303,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17263288795948029,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22668661475181578,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03628960847854614,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024686503689736127,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13041841089725495,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.172030445933342,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.027530809864401817,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018649833044037222,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07613101899623871,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09484113454818725,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016059026680886747,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010886735515668988,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11801900565624238,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14812451601028442,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.024889787659049034,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016876716865226627,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1727170765399933,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22679380774497987,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03630736693739891,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002469854103401303,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31580972075462344,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38427644968032837,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.46509563326835635,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03158097080886364,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.47999400007499904,
|
|
"eval_calibration/aurc": 0.16057820298915118,
|
|
"eval_calibration/batch_distribution_entropy": 0.914145618708674,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9753044494695922,
|
|
"eval_calibration/confidence_entropy": 0.4683044711172521,
|
|
"eval_calibration/coverage@0%": 0.22395833333333334,
|
|
"eval_calibration/coverage@1%": 0.22395833333333334,
|
|
"eval_calibration/coverage@10%": 0.5677083333333334,
|
|
"eval_calibration/coverage@15%": 0.721606182795699,
|
|
"eval_calibration/coverage@20%": 0.81065188172043,
|
|
"eval_calibration/coverage@25%": 0.873991935483871,
|
|
"eval_calibration/coverage@30%": 0.947244623655914,
|
|
"eval_calibration/coverage@5%": 0.2708333333333333,
|
|
"eval_calibration/ece": 0.2455773573190524,
|
|
"eval_calibration/mean_confidence": 0.5615903357669692,
|
|
"eval_completions/clipped_ratio": 0.002604166666666685,
|
|
"eval_completions/max_length": 2298.3333333333335,
|
|
"eval_completions/max_terminated_length": 2298.3333333333335,
|
|
"eval_completions/mean_length": 798.5604349772135,
|
|
"eval_completions/mean_terminated_length": 800.6705627441406,
|
|
"eval_completions/min_length": 134.0,
|
|
"eval_completions/min_terminated_length": 276.1666666666667,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 427258248.0,
|
|
"eval_reward": 0.9212540686130524,
|
|
"eval_reward_std": 0.2215366984407107,
|
|
"eval_rewards/accuracy_reward": 0.6866319378217062,
|
|
"eval_rewards/brier_reward": 0.8282056351502737,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8917240798473358,
|
|
"eval_rewards/format_reward": 0.9973958432674408,
|
|
"eval_rewards/frontier_coverage_0": 0.047293830662965775,
|
|
"eval_rewards/frontier_coverage_1": 0.047293830662965775,
|
|
"eval_rewards/frontier_coverage_10": 0.04729795269668102,
|
|
"eval_rewards/frontier_coverage_15": 0.04440750305851301,
|
|
"eval_rewards/frontier_coverage_20": 0.08812103296319644,
|
|
"eval_rewards/frontier_coverage_25": 0.16687769691149393,
|
|
"eval_rewards/frontier_coverage_5": 0.047292555992801986,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9973958432674408,
|
|
"eval_runtime": 166.4081,
|
|
"eval_samples_per_second": 6.009,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4191080729166667,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4639366815487544,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9535810748736063,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20955403645833334,
|
|
"eval_signal/advantage_abs_mean": 0.8791789809862772,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.1954043780763944,
|
|
"eval_signal/advantage_pre_scale_std": 0.21952204157908758,
|
|
"eval_signal/advantage_std": 0.9863749047120413,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.16421432544787726,
|
|
"eval_signal/brier_reward/group_std_mean": 0.22416182110706964,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0745612805088361,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.016421433072537184,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04440143456061681,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06044746252397696,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020217653984824818,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004440143549193938,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0050455727614462376,
|
|
"eval_signal/format_reward/group_std_mean": 0.014731391333043575,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.9166666865348816,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011211627162992954,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.296345055103302,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.4005911747614543,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01929074029127757,
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004237734169388811,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.296345055103302,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4005911747614543,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01929074029127757,
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004237734169388811,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.296173761288325,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.40038461486498517,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0192795991897583,
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004235284713407357,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.17665722717841467,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.24827261020739874,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01150304094577829,
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025261982421701155,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.10272979860504468,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.13188674176732698,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006687632451454799,
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014690360403619707,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.19587110231320062,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.24194891502459845,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01274662526945273,
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002800956523666779,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.29633869727452594,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.40058427552382153,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.019290315608183544,
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004237643443048,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0050455727614462376,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.014731391333043575,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666865348816,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0022423254946867623,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0005045572955471774,
|
|
"eval_steps_per_second": 0.036,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1186967407697009,
|
|
"calibration/batch_distribution_entropy": 0.9550775718290982,
|
|
"calibration/buffer_distribution_entropy": 0.9759632840081753,
|
|
"calibration/confidence_entropy": 0.47419539528335974,
|
|
"calibration/coverage@0%": 0.026565226876090753,
|
|
"calibration/coverage@1%": 0.026565226876090753,
|
|
"calibration/coverage@10%": 0.4854984729493892,
|
|
"calibration/coverage@15%": 0.8464511950409872,
|
|
"calibration/coverage@20%": 0.9174506613422097,
|
|
"calibration/coverage@25%": 0.9550392670157068,
|
|
"calibration/coverage@30%": 0.9853403141361257,
|
|
"calibration/coverage@5%": 0.3022660340314136,
|
|
"calibration/ece": 0.18733712170138508,
|
|
"calibration/mean_confidence": 0.6067382862272922,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.004947916666666652,
|
|
"completions/max_length": 3626.8,
|
|
"completions/max_terminated_length": 3626.8,
|
|
"completions/mean_length": 804.612939453125,
|
|
"completions/mean_terminated_length": 808.594140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 237.4,
|
|
"epoch": 0.491993850076874,
|
|
"grad_norm": 0.0037702268455177546,
|
|
"learning_rate": 9.036144578313253e-08,
|
|
"loss": -0.0034,
|
|
"num_tokens": 439593341.0,
|
|
"reward": 1.0242285490036012,
|
|
"reward_std": 0.11114266216754913,
|
|
"rewards/accuracy_reward": 0.7535590410232544,
|
|
"rewards/brier_reward": 0.8131016492843628,
|
|
"rewards/confidence_uniqueness_reward": 0.94401034116745,
|
|
"rewards/format_reward": 0.9950520753860473,
|
|
"rewards/frontier_coverage_0": -0.009763723891228437,
|
|
"rewards/frontier_coverage_1": -0.009763723891228437,
|
|
"rewards/frontier_coverage_10": -0.00971116297878325,
|
|
"rewards/frontier_coverage_15": 0.01904887929558754,
|
|
"rewards/frontier_coverage_20": 0.0988022267818451,
|
|
"rewards/frontier_coverage_25": 0.18960395753383635,
|
|
"rewards/frontier_coverage_5": -0.0097591457888484,
|
|
"rewards/frontier_entropy_batch_reward": -0.29627181887626647,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.13992513120174407,
|
|
"signal/accuracy_reward/group_std_mean": 0.18550328016281128,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4638888895511627,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0376464486122132,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06996256560087204,
|
|
"signal/advantage_abs_mean": 0.7619848847389221,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08387549370527267,
|
|
"signal/advantage_pre_scale_std": 0.1350790113210678,
|
|
"signal/advantage_std": 0.9830043315887451,
|
|
"signal/brier_reward/centered_abs_mean": 0.129868845641613,
|
|
"signal/brier_reward/group_std_mean": 0.1658725470304489,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1923790842294693,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012986884266138077,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02169004678726196,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03326268345117569,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03214513845741749,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002169004688039422,
|
|
"signal/format_reward/centered_abs_mean": 0.008241102285683154,
|
|
"signal/format_reward/group_std_mean": 0.01677692960947752,
|
|
"signal/format_reward/group_zero_std_frac": 0.9250000238418579,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06075965389609337,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.004120551142841577,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18534817099571227,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23780874013900757,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.039334161579608916,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026504788547754288,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18534817099571227,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23780874013900757,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.039334161579608916,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026504788547754288,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1850076824426651,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23738227784633636,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.039263205230236055,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026456098072230815,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09948588758707047,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.12873928546905516,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021109068393707277,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014226482482627035,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08255493640899658,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10234281718730927,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01751541830599308,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011805356247350574,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12076869606971741,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15139889121055602,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.025616540387272835,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017269923351705073,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18533942103385925,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2377980649471283,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03933229818940163,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002650353778153658,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3371506452560425,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40471735000610354,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5003003001213073,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03371506631374359,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1272282988853613,
|
|
"calibration/batch_distribution_entropy": 0.9597042176053079,
|
|
"calibration/buffer_distribution_entropy": 0.9764644981690312,
|
|
"calibration/confidence_entropy": 0.4988459667423297,
|
|
"calibration/coverage@0%": 0.03220648332796521,
|
|
"calibration/coverage@1%": 0.03220648332796521,
|
|
"calibration/coverage@10%": 0.42679129860725634,
|
|
"calibration/coverage@15%": 0.716446344990166,
|
|
"calibration/coverage@20%": 0.8498751624715318,
|
|
"calibration/coverage@25%": 0.9440389794617708,
|
|
"calibration/coverage@30%": 0.9772528433945756,
|
|
"calibration/coverage@5%": 0.08715098602230857,
|
|
"calibration/ece": 0.13103508682212492,
|
|
"calibration/mean_confidence": 0.6140400005582972,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0028935185185185266,
|
|
"completions/max_length": 3365.0,
|
|
"completions/max_terminated_length": 3365.0,
|
|
"completions/mean_length": 805.1265869140625,
|
|
"completions/mean_terminated_length": 807.4395955403646,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 198.0,
|
|
"epoch": 0.49919376007799904,
|
|
"num_tokens": 447024424.0,
|
|
"reward": 1.0055522521336873,
|
|
"reward_std": 0.10900399088859558,
|
|
"rewards/accuracy_reward": 0.7063078681627909,
|
|
"rewards/brier_reward": 0.8075371583302816,
|
|
"rewards/confidence_uniqueness_reward": 0.9471040964126587,
|
|
"rewards/format_reward": 0.9971064925193787,
|
|
"rewards/frontier_coverage_0": 0.008651394241799911,
|
|
"rewards/frontier_coverage_1": 0.008651394241799911,
|
|
"rewards/frontier_coverage_10": 0.008720822011431059,
|
|
"rewards/frontier_coverage_15": 0.03024888038635254,
|
|
"rewards/frontier_coverage_20": 0.09493551154931386,
|
|
"rewards/frontier_coverage_25": 0.173322523633639,
|
|
"rewards/frontier_coverage_5": 0.008652187573413054,
|
|
"rewards/frontier_entropy_batch_reward": -0.2638363142808278,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1340241606036822,
|
|
"signal/accuracy_reward/group_std_mean": 0.18462320665518442,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4583333333333333,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0006801684697468,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0670120803018411,
|
|
"signal/advantage_abs_mean": 0.7406850457191467,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07968033105134964,
|
|
"signal/advantage_pre_scale_std": 0.12958685557047525,
|
|
"signal/advantage_std": 0.9829863905906677,
|
|
"signal/brier_reward/centered_abs_mean": 0.12960121283928552,
|
|
"signal/brier_reward/group_std_mean": 0.16745843489964804,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19411064187685648,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012960121346016725,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01816164267559846,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02970569891234239,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027298261721928913,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001816164197710653,
|
|
"signal/format_reward/centered_abs_mean": 0.0055157696673025685,
|
|
"signal/format_reward/group_std_mean": 0.014287550002336502,
|
|
"signal/format_reward/group_zero_std_frac": 0.9259259502092997,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.04154850294192632,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0027578848336512842,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17834581434726715,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23521957298119864,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.038279421627521515,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00255034522463878,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17834581434726715,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23521957298119864,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038279421627521515,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00255034522463878,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17744634052117667,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23404847085475922,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.038087598979473114,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025374825733403363,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07600981990496318,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.099979134897391,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016342710082729656,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010869404068216681,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08222619444131851,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10303841282924016,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01762464890877406,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011758345644921064,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12105090419451396,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15320136646429697,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02592242571214835,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017310279266287882,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17833813031514487,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23520942529042563,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0382777601480484,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002550235173354546,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3061721622943878,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37579457958539325,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4601670801639557,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03061721660196781,
|
|
"step": 208,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.022534527126341485,
|
|
"train_runtime": 39191.2693,
|
|
"train_samples_per_second": 0.383,
|
|
"train_steps_per_second": 0.005
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 208,
|
|
"num_input_tokens_seen": 447024424,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 6,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|