8085 lines
491 KiB
JSON
8085 lines
491 KiB
JSON
|
|
{
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 0.9984,
|
||
|
|
"eval_steps": 50,
|
||
|
|
"global_step": 312,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.6291125962699202,
|
||
|
|
"calibration/batch_distribution_entropy": 0.6579274024264119,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.6317432625270244,
|
||
|
|
"calibration/confidence_entropy": 0.3463624208288166,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0,
|
||
|
|
"calibration/coverage@15%": 0.0,
|
||
|
|
"calibration/coverage@20%": 0.0,
|
||
|
|
"calibration/coverage@25%": 0.0,
|
||
|
|
"calibration/coverage@30%": 0.0,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.49361065763556233,
|
||
|
|
"calibration/mean_confidence": 0.7892375473378748,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.03330078125,
|
||
|
|
"completions/max_length": 1506.2,
|
||
|
|
"completions/max_terminated_length": 1506.2,
|
||
|
|
"completions/mean_length": 217.56533203125,
|
||
|
|
"completions/mean_terminated_length": 225.04311828613282,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 2.0,
|
||
|
|
"epoch": 0.016,
|
||
|
|
"grad_norm": 0.03844146803021431,
|
||
|
|
"learning_rate": 3.1249999999999997e-07,
|
||
|
|
"loss": 0.0107,
|
||
|
|
"num_tokens": 17071901.0,
|
||
|
|
"reward": 0.5538008451461792,
|
||
|
|
"reward_std": 0.4007190465927124,
|
||
|
|
"rewards/accuracy_reward": 0.2220703125,
|
||
|
|
"rewards/brier_reward": 0.37496077418327334,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.48197067975997926,
|
||
|
|
"rewards/format_reward": 0.684375,
|
||
|
|
"rewards/frontier_coverage_0": 0.14870173409581183,
|
||
|
|
"rewards/frontier_coverage_1": 0.14870173409581183,
|
||
|
|
"rewards/frontier_coverage_10": 0.14870173409581183,
|
||
|
|
"rewards/frontier_coverage_15": 0.14870173409581183,
|
||
|
|
"rewards/frontier_coverage_20": 0.14870173409581183,
|
||
|
|
"rewards/frontier_coverage_25": 0.14870173409581183,
|
||
|
|
"rewards/frontier_coverage_5": 0.14870173409581183,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.23719482421875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.28042069971561434,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.31875,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.118597412109375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.118597412109375,
|
||
|
|
"signal/advantage_abs_mean": 0.34127363562583923,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.34127363562583923,
|
||
|
|
"signal/advantage_pre_scale_std": 0.40609942078590394,
|
||
|
|
"signal/advantage_std": 0.40609942078590394,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.31728167533874513,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.36332341432571413,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031728167459368704,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.031728167459368704,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.29734116792678833,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.34809759855270384,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029734116792678834,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029734116792678834,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.4034423828125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.45386149883270266,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.20172119140625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.20172119140625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15856626331806184,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20916377156972885,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.003125,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002267497556749731,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002267497556749731,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15856626331806184,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20916377156972885,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002267497556749731,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002267497556749731,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15856626331806184,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20916377156972885,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002267497556749731,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002267497556749731,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15856626331806184,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20916377156972885,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002267497556749731,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002267497556749731,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15856626331806184,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20916377156972885,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002267497556749731,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002267497556749731,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.15856626331806184,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.20916377156972885,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002267497556749731,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002267497556749731,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15856626331806184,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20916377156972885,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002267497556749731,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002267497556749731,
|
||
|
|
"step": 5
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.6663210426183143,
|
||
|
|
"calibration/batch_distribution_entropy": 0.6396787062068319,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.6579678305027443,
|
||
|
|
"calibration/confidence_entropy": 0.34138574234262214,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0,
|
||
|
|
"calibration/coverage@15%": 0.0,
|
||
|
|
"calibration/coverage@20%": 0.0,
|
||
|
|
"calibration/coverage@25%": 0.0,
|
||
|
|
"calibration/coverage@30%": 0.0,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.5356770471461247,
|
||
|
|
"calibration/mean_confidence": 0.7990239861496942,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0384765625,
|
||
|
|
"completions/max_length": 1505.0,
|
||
|
|
"completions/max_terminated_length": 1505.0,
|
||
|
|
"completions/mean_length": 207.08623046875,
|
||
|
|
"completions/mean_terminated_length": 215.4260681152344,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 1.8,
|
||
|
|
"epoch": 0.032,
|
||
|
|
"grad_norm": 0.02313925139605999,
|
||
|
|
"learning_rate": 6.249999999999999e-07,
|
||
|
|
"loss": 0.0036,
|
||
|
|
"num_tokens": 34292816.0,
|
||
|
|
"reward": 0.5559734106063843,
|
||
|
|
"reward_std": 0.3749539077281952,
|
||
|
|
"rewards/accuracy_reward": 0.2087890625,
|
||
|
|
"rewards/brier_reward": 0.37693819403648376,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.5149972319602967,
|
||
|
|
"rewards/format_reward": 0.71552734375,
|
||
|
|
"rewards/frontier_coverage_0": 0.0461702860891819,
|
||
|
|
"rewards/frontier_coverage_1": 0.0461702860891819,
|
||
|
|
"rewards/frontier_coverage_10": 0.0461702860891819,
|
||
|
|
"rewards/frontier_coverage_15": 0.0461702860891819,
|
||
|
|
"rewards/frontier_coverage_20": 0.0461702860891819,
|
||
|
|
"rewards/frontier_coverage_25": 0.0461702860891819,
|
||
|
|
"rewards/frontier_coverage_5": 0.0461702860891819,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2244140625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.2725442975759506,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11220703125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11220703125,
|
||
|
|
"signal/advantage_abs_mean": 0.31165753602981566,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.31165753602981566,
|
||
|
|
"signal/advantage_pre_scale_std": 0.379498028755188,
|
||
|
|
"signal/advantage_std": 0.379498028755188,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.30591880679130556,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.35432358980178835,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030591881647706032,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.030591881647706032,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.28595311045646665,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.342820942401886,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02859531156718731,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02859531156718731,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.380755615234375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.4403470873832703,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1903778076171875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1903778076171875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.07552953511476516,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.125508613884449,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0010800723102875054,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0010800723102875054,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.07552953511476516,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.125508613884449,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0010800723102875054,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0010800723102875054,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.07552953511476516,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.125508613884449,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0010800723102875054,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010800723102875054,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07552953511476516,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.125508613884449,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010800723102875054,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010800723102875054,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07552953511476516,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.125508613884449,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010800723102875054,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010800723102875054,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07552953511476516,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.125508613884449,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010800723102875054,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010800723102875054,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.07552953511476516,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.125508613884449,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0010800723102875054,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0010800723102875054,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.6035156031942368,
|
||
|
|
"calibration/batch_distribution_entropy": 0.6349305603027458,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.6554486012564013,
|
||
|
|
"calibration/confidence_entropy": 0.34504651675502557,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0,
|
||
|
|
"calibration/coverage@15%": 0.0,
|
||
|
|
"calibration/coverage@20%": 0.0,
|
||
|
|
"calibration/coverage@25%": 0.0,
|
||
|
|
"calibration/coverage@30%": 0.0,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.46748896032825493,
|
||
|
|
"calibration/mean_confidence": 0.8060340434356423,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.01787109375,
|
||
|
|
"completions/max_length": 1460.4,
|
||
|
|
"completions/max_terminated_length": 1460.4,
|
||
|
|
"completions/mean_length": 174.453125,
|
||
|
|
"completions/mean_terminated_length": 177.67768249511718,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 17.6,
|
||
|
|
"epoch": 0.048,
|
||
|
|
"grad_norm": 0.016342012211680412,
|
||
|
|
"learning_rate": 9.374999999999999e-07,
|
||
|
|
"loss": 0.0069,
|
||
|
|
"num_tokens": 51127952.0,
|
||
|
|
"reward": 0.6984765291213989,
|
||
|
|
"reward_std": 0.28357043862342834,
|
||
|
|
"rewards/accuracy_reward": 0.27353515625,
|
||
|
|
"rewards/brier_reward": 0.4843921780586243,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.6443804860115051,
|
||
|
|
"rewards/format_reward": 0.8849609375,
|
||
|
|
"rewards/frontier_coverage_0": 0.06344863995909691,
|
||
|
|
"rewards/frontier_coverage_1": 0.06344863995909691,
|
||
|
|
"rewards/frontier_coverage_10": 0.06344863995909691,
|
||
|
|
"rewards/frontier_coverage_15": 0.06344863995909691,
|
||
|
|
"rewards/frontier_coverage_20": 0.06344863995909691,
|
||
|
|
"rewards/frontier_coverage_25": 0.06344863995909691,
|
||
|
|
"rewards/frontier_coverage_5": 0.06344863995909691,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.205230712890625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.25404774844646455,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.340625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1026153564453125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1026153564453125,
|
||
|
|
"signal/advantage_abs_mean": 0.20717814862728118,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.20717814862728118,
|
||
|
|
"signal/advantage_pre_scale_std": 0.29353936910629275,
|
||
|
|
"signal/advantage_std": 0.29353936910629275,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.2758757948875427,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.33000465035438536,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02758757919073105,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02758757919073105,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.19257701933383942,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2580618649721146,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01925770305097103,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01925770305097103,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.1892578125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.29168896079063417,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.0625,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.09462890625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.09462890625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.09350458979606628,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.14712692201137542,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013371156295761467,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013371156295761467,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09350458979606628,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.14712692201137542,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013371156295761467,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013371156295761467,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09350458979606628,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.14712692201137542,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013371156295761467,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013371156295761467,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09350458979606628,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.14712692201137542,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013371156295761467,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013371156295761467,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09350458979606628,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14712692201137542,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013371156295761467,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013371156295761467,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09350458979606628,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14712692201137542,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013371156295761467,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013371156295761467,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09350458979606628,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.14712692201137542,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013371156295761467,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013371156295761467,
|
||
|
|
"step": 15
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.5439649409788888,
|
||
|
|
"calibration/batch_distribution_entropy": 0.6677070099797058,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.6549928867820553,
|
||
|
|
"calibration/confidence_entropy": 0.35836314133723945,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0,
|
||
|
|
"calibration/coverage@15%": 0.0,
|
||
|
|
"calibration/coverage@20%": 0.0,
|
||
|
|
"calibration/coverage@25%": 0.0,
|
||
|
|
"calibration/coverage@30%": 0.0,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.4115229909417037,
|
||
|
|
"calibration/mean_confidence": 0.8007890667770411,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0044921875,
|
||
|
|
"completions/max_length": 1172.0,
|
||
|
|
"completions/max_terminated_length": 1172.0,
|
||
|
|
"completions/mean_length": 131.76064453125,
|
||
|
|
"completions/mean_terminated_length": 132.35692749023437,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 31.6,
|
||
|
|
"epoch": 0.064,
|
||
|
|
"grad_norm": 0.0032985019497573376,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0021,
|
||
|
|
"num_tokens": 67395581.0,
|
||
|
|
"reward": 0.7972017049789428,
|
||
|
|
"reward_std": 0.18594848811626435,
|
||
|
|
"rewards/accuracy_reward": 0.34326171875,
|
||
|
|
"rewards/brier_reward": 0.5591598868370056,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.7354918837547302,
|
||
|
|
"rewards/format_reward": 0.9814453125,
|
||
|
|
"rewards/frontier_coverage_0": 0.053776346147060394,
|
||
|
|
"rewards/frontier_coverage_1": 0.053776346147060394,
|
||
|
|
"rewards/frontier_coverage_10": 0.053776346147060394,
|
||
|
|
"rewards/frontier_coverage_15": 0.053776346147060394,
|
||
|
|
"rewards/frontier_coverage_20": 0.053776346147060394,
|
||
|
|
"rewards/frontier_coverage_25": 0.053776346147060394,
|
||
|
|
"rewards/frontier_coverage_5": 0.053776346147060394,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.200811767578125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.2528608232736588,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.33125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1004058837890625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1004058837890625,
|
||
|
|
"signal/advantage_abs_mean": 0.13703744262456893,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.13703744262456893,
|
||
|
|
"signal/advantage_pre_scale_std": 0.20460715293884277,
|
||
|
|
"signal/advantage_std": 0.20460715293884277,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.23956941366195678,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.295298820734024,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023956941068172456,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.023956941068172456,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1292675703763962,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.16289211213588714,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012926757708191872,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012926757708191872,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.03507080078125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.08618362993001938,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.571875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.017535400390625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.017535400390625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.09282867759466171,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.14971067011356354,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013274500845000148,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013274500845000148,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09282867759466171,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.14971067011356354,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013274500845000148,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013274500845000148,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09282867759466171,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.14971067011356354,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013274500845000148,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013274500845000148,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09282867759466171,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.14971067011356354,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013274500845000148,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013274500845000148,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09282867759466171,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14971067011356354,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013274500845000148,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013274500845000148,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09282867759466171,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14971067011356354,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013274500845000148,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013274500845000148,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09282867759466171,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.14971067011356354,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013274500845000148,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013274500845000148,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.6445319959277507,
|
||
|
|
"calibration/batch_distribution_entropy": 0.7455506382256587,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.6783067089120915,
|
||
|
|
"calibration/confidence_entropy": 0.41936467109944864,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0,
|
||
|
|
"calibration/coverage@15%": 0.0,
|
||
|
|
"calibration/coverage@20%": 0.0,
|
||
|
|
"calibration/coverage@25%": 0.0,
|
||
|
|
"calibration/coverage@30%": 0.0,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.4475651051804454,
|
||
|
|
"calibration/mean_confidence": 0.7565154097650527,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.001171875,
|
||
|
|
"completions/max_length": 698.6,
|
||
|
|
"completions/max_terminated_length": 698.6,
|
||
|
|
"completions/mean_length": 109.54462890625,
|
||
|
|
"completions/mean_terminated_length": 109.67274932861328,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 36.6,
|
||
|
|
"epoch": 0.08,
|
||
|
|
"grad_norm": 0.006463681813329458,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0026,
|
||
|
|
"num_tokens": 83450470.0,
|
||
|
|
"reward": 0.8225439548492431,
|
||
|
|
"reward_std": 0.15419970750808715,
|
||
|
|
"rewards/accuracy_reward": 0.36044921875,
|
||
|
|
"rewards/brier_reward": 0.5978257536888123,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.8047454833984375,
|
||
|
|
"rewards/format_reward": 0.99375,
|
||
|
|
"rewards/frontier_coverage_0": 0.051820401847362516,
|
||
|
|
"rewards/frontier_coverage_1": 0.051820401847362516,
|
||
|
|
"rewards/frontier_coverage_10": 0.051820401847362516,
|
||
|
|
"rewards/frontier_coverage_15": 0.051820401847362516,
|
||
|
|
"rewards/frontier_coverage_20": 0.051820401847362516,
|
||
|
|
"rewards/frontier_coverage_25": 0.051820401847362516,
|
||
|
|
"rewards/frontier_coverage_5": 0.051820401847362516,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.184283447265625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.2312079608440399,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.384375,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0921417236328125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0921417236328125,
|
||
|
|
"signal/advantage_abs_mean": 0.11761517375707627,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.11761517375707627,
|
||
|
|
"signal/advantage_pre_scale_std": 0.17681845128536225,
|
||
|
|
"signal/advantage_std": 0.17681845128536225,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.2217628002166748,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.27526147961616515,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0221762802451849,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0221762802451849,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07582313790917397,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10242749750614166,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007582314219325781,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007582314219325781,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.012060546875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.03401010446250439,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.8125,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0060302734375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0060302734375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.11067185401916504,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.16877196729183197,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001582607487216592,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001582607487216592,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11067185401916504,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16877196729183197,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001582607487216592,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001582607487216592,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11067185401916504,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16877196729183197,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001582607487216592,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001582607487216592,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11067185401916504,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16877196729183197,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001582607487216592,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001582607487216592,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11067185401916504,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16877196729183197,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001582607487216592,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001582607487216592,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11067185401916504,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16877196729183197,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001582607487216592,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001582607487216592,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11067185401916504,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16877196729183197,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001582607487216592,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001582607487216592,
|
||
|
|
"step": 25
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.6309762117309875,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8369892130203398,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.7564496460395704,
|
||
|
|
"calibration/confidence_entropy": 0.5035340910722915,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0,
|
||
|
|
"calibration/coverage@15%": 0.0,
|
||
|
|
"calibration/coverage@20%": 0.0,
|
||
|
|
"calibration/coverage@25%": 0.0,
|
||
|
|
"calibration/coverage@30%": 0.0,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.35330436394613157,
|
||
|
|
"calibration/mean_confidence": 0.6835460635754658,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0009765625,
|
||
|
|
"completions/max_length": 811.2,
|
||
|
|
"completions/max_terminated_length": 811.2,
|
||
|
|
"completions/mean_length": 111.26328125,
|
||
|
|
"completions/mean_terminated_length": 111.37155609130859,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 41.2,
|
||
|
|
"epoch": 0.096,
|
||
|
|
"grad_norm": 0.00296254875138402,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.002,
|
||
|
|
"num_tokens": 99634414.0,
|
||
|
|
"reward": 0.8360645532608032,
|
||
|
|
"reward_std": 0.13758190870285034,
|
||
|
|
"rewards/accuracy_reward": 0.36787109375,
|
||
|
|
"rewards/brier_reward": 0.6485287547111511,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.8281476020812988,
|
||
|
|
"rewards/format_reward": 0.99609375,
|
||
|
|
"rewards/frontier_coverage_0": 0.064080910384655,
|
||
|
|
"rewards/frontier_coverage_1": 0.064080910384655,
|
||
|
|
"rewards/frontier_coverage_10": 0.064080910384655,
|
||
|
|
"rewards/frontier_coverage_15": 0.064080910384655,
|
||
|
|
"rewards/frontier_coverage_20": 0.064080910384655,
|
||
|
|
"rewards/frontier_coverage_25": 0.064080910384655,
|
||
|
|
"rewards/frontier_coverage_5": 0.064080910384655,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.167340087890625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.21798568665981294,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0836700439453125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0836700439453125,
|
||
|
|
"signal/advantage_abs_mean": 0.1038191020488739,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.1038191020488739,
|
||
|
|
"signal/advantage_pre_scale_std": 0.15961573123931885,
|
||
|
|
"signal/advantage_std": 0.15961573123931885,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.20211856961250305,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.24915991723537445,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020211857557296754,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020211857557296754,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0711653858423233,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09434090554714203,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007116538938134909,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007116538938134909,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.0074951171875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.020079236291348935,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.89375,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00374755859375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00374755859375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13252932876348494,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1887018859386444,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018951693316921591,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018951693316921591,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13252932876348494,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1887018859386444,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018951693316921591,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018951693316921591,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13252932876348494,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1887018859386444,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018951693316921591,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018951693316921591,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13252932876348494,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1887018859386444,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018951693316921591,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018951693316921591,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13252932876348494,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1887018859386444,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018951693316921591,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018951693316921591,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13252932876348494,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1887018859386444,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018951693316921591,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018951693316921591,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13252932876348494,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1887018859386444,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018951693316921591,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018951693316921591,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.4826404430899339,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8675797481644141,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8366443607115505,
|
||
|
|
"calibration/confidence_entropy": 0.5551013133970907,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0,
|
||
|
|
"calibration/coverage@15%": 0.0,
|
||
|
|
"calibration/coverage@20%": 0.009025816552062868,
|
||
|
|
"calibration/coverage@25%": 0.009811671168958742,
|
||
|
|
"calibration/coverage@30%": 0.03527587992937305,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.19176894753818877,
|
||
|
|
"calibration/mean_confidence": 0.6017602714878476,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.000390625,
|
||
|
|
"completions/max_length": 531.8,
|
||
|
|
"completions/max_terminated_length": 531.8,
|
||
|
|
"completions/mean_length": 119.00185546875,
|
||
|
|
"completions/mean_terminated_length": 119.04827575683593,
|
||
|
|
"completions/min_length": 18.2,
|
||
|
|
"completions/min_terminated_length": 44.0,
|
||
|
|
"epoch": 0.112,
|
||
|
|
"grad_norm": 0.0017838370986282825,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"num_tokens": 115962465.0,
|
||
|
|
"reward": 0.8670443058013916,
|
||
|
|
"reward_std": 0.13026245534420014,
|
||
|
|
"rewards/accuracy_reward": 0.41376953125,
|
||
|
|
"rewards/brier_reward": 0.7080369591712952,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.8345065712928772,
|
||
|
|
"rewards/format_reward": 0.99775390625,
|
||
|
|
"rewards/frontier_coverage_0": 0.07021187543869019,
|
||
|
|
"rewards/frontier_coverage_1": 0.07021187543869019,
|
||
|
|
"rewards/frontier_coverage_10": 0.07021187543869019,
|
||
|
|
"rewards/frontier_coverage_15": 0.07021187543869019,
|
||
|
|
"rewards/frontier_coverage_20": 0.07021187543869019,
|
||
|
|
"rewards/frontier_coverage_25": 0.07021187543869019,
|
||
|
|
"rewards/frontier_coverage_5": 0.07021187543869019,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.171795654296875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.22035529017448424,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.39375,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0858978271484375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0858978271484375,
|
||
|
|
"signal/advantage_abs_mean": 0.10090952962636948,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.10090952962636948,
|
||
|
|
"signal/advantage_pre_scale_std": 0.15009926557540892,
|
||
|
|
"signal/advantage_std": 0.15009926557540892,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.17961066961288452,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.2244138687849045,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017961067706346513,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017961067706346513,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09093757271766663,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1149788647890091,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009093757718801498,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009093757718801498,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.004351806640625,
|
||
|
|
"signal/format_reward/group_std_mean": 0.012705824617296458,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.928125,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0021759033203125,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0021759033203125,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17380160391330718,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2313144624233246,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00248536285944283,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00248536285944283,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17380160391330718,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2313144624233246,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00248536285944283,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00248536285944283,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17380160391330718,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2313144624233246,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00248536285944283,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00248536285944283,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17380160391330718,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2313144624233246,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00248536285944283,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00248536285944283,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17380160391330718,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2313144624233246,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00248536285944283,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00248536285944283,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17380160391330718,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2313144624233246,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00248536285944283,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00248536285944283,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17380160391330718,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2313144624233246,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00248536285944283,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00248536285944283,
|
||
|
|
"step": 35
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.5205592590994277,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8709230881186457,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.880652703907203,
|
||
|
|
"calibration/confidence_entropy": 0.5875897956550009,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.0,
|
||
|
|
"calibration/coverage@15%": 0.0,
|
||
|
|
"calibration/coverage@20%": 0.0,
|
||
|
|
"calibration/coverage@25%": 0.0,
|
||
|
|
"calibration/coverage@30%": 0.01761252446183953,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.1345443869115383,
|
||
|
|
"calibration/mean_confidence": 0.48074756249087625,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00068359375,
|
||
|
|
"completions/max_length": 429.2,
|
||
|
|
"completions/max_terminated_length": 429.2,
|
||
|
|
"completions/mean_length": 127.03271484375,
|
||
|
|
"completions/mean_terminated_length": 127.1189453125,
|
||
|
|
"completions/min_length": 10.4,
|
||
|
|
"completions/min_terminated_length": 48.6,
|
||
|
|
"epoch": 0.128,
|
||
|
|
"grad_norm": 0.0014243983896449208,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0012,
|
||
|
|
"num_tokens": 132179952.0,
|
||
|
|
"reward": 0.87330641746521,
|
||
|
|
"reward_std": 0.11022595316171646,
|
||
|
|
"rewards/accuracy_reward": 0.412109375,
|
||
|
|
"rewards/brier_reward": 0.7398777008056641,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.8477198123931885,
|
||
|
|
"rewards/format_reward": 0.99873046875,
|
||
|
|
"rewards/frontier_coverage_0": 0.09117618799209595,
|
||
|
|
"rewards/frontier_coverage_1": 0.09117618799209595,
|
||
|
|
"rewards/frontier_coverage_10": 0.09117618799209595,
|
||
|
|
"rewards/frontier_coverage_15": 0.09117618799209595,
|
||
|
|
"rewards/frontier_coverage_20": 0.09117618799209595,
|
||
|
|
"rewards/frontier_coverage_25": 0.09117618799209595,
|
||
|
|
"rewards/frontier_coverage_5": 0.09117618799209595,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15081787109375,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.19553602039813994,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.453125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.075408935546875,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.075408935546875,
|
||
|
|
"signal/advantage_abs_mean": 0.08564974516630172,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.08564974516630172,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1290176823735237,
|
||
|
|
"signal/advantage_std": 0.1290176823735237,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.16081323921680452,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.2026536852121353,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01608132477849722,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01608132477849722,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08570207059383392,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10545764565467834,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00857020691037178,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00857020691037178,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002459716796875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.007181552890688181,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.959375,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012298583984375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0012298583984375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20869247913360595,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.26470946073532103,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002984302304685116,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002984302304685116,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20869247913360595,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26470946073532103,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002984302304685116,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002984302304685116,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20869247913360595,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26470946073532103,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002984302304685116,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002984302304685116,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20869247913360595,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26470946073532103,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002984302304685116,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002984302304685116,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20869247913360595,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.26470946073532103,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002984302304685116,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002984302304685116,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20869247913360595,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.26470946073532103,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002984302304685116,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002984302304685116,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20869247913360595,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.26470946073532103,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002984302304685116,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002984302304685116,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.3661014364381845,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8654614858918537,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8948939325024583,
|
||
|
|
"calibration/confidence_entropy": 0.5692098412044415,
|
||
|
|
"calibration/coverage@0%": 0.007436399217221135,
|
||
|
|
"calibration/coverage@1%": 0.007436399217221135,
|
||
|
|
"calibration/coverage@10%": 0.03444227005870841,
|
||
|
|
"calibration/coverage@15%": 0.05870841487279843,
|
||
|
|
"calibration/coverage@20%": 0.09041095890410958,
|
||
|
|
"calibration/coverage@25%": 0.213695572407045,
|
||
|
|
"calibration/coverage@30%": 0.30481286692759296,
|
||
|
|
"calibration/coverage@5%": 0.007436399217221135,
|
||
|
|
"calibration/ece": 0.23127731773099808,
|
||
|
|
"calibration/mean_confidence": 0.40613134023133063,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00048828125,
|
||
|
|
"completions/max_length": 361.4,
|
||
|
|
"completions/max_terminated_length": 361.4,
|
||
|
|
"completions/mean_length": 134.53935546875,
|
||
|
|
"completions/mean_terminated_length": 134.60521240234374,
|
||
|
|
"completions/min_length": 11.0,
|
||
|
|
"completions/min_terminated_length": 52.2,
|
||
|
|
"epoch": 0.144,
|
||
|
|
"grad_norm": 0.001966482726857066,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0009,
|
||
|
|
"num_tokens": 148508067.0,
|
||
|
|
"reward": 0.9146429896354675,
|
||
|
|
"reward_std": 0.10571834594011306,
|
||
|
|
"rewards/accuracy_reward": 0.5076171875,
|
||
|
|
"rewards/brier_reward": 0.7293356657028198,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.8530200600624085,
|
||
|
|
"rewards/format_reward": 0.99892578125,
|
||
|
|
"rewards/frontier_coverage_0": 0.031327979266643526,
|
||
|
|
"rewards/frontier_coverage_1": 0.031327979266643526,
|
||
|
|
"rewards/frontier_coverage_10": 0.031327979266643526,
|
||
|
|
"rewards/frontier_coverage_15": 0.031327979266643526,
|
||
|
|
"rewards/frontier_coverage_20": 0.031327979266643526,
|
||
|
|
"rewards/frontier_coverage_25": 0.031327979266643526,
|
||
|
|
"rewards/frontier_coverage_5": 0.031327979266643526,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15548095703125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.20547467172145845,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.077740478515625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.077740478515625,
|
||
|
|
"signal/advantage_abs_mean": 0.08054517805576325,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.08054517805576325,
|
||
|
|
"signal/advantage_pre_scale_std": 0.12199195474386215,
|
||
|
|
"signal/advantage_std": 0.12199195474386215,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.16138457357883454,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.20098112821578978,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016138457320630552,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016138457320630552,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08552988022565841,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10412109196186066,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008552988339215518,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008552988339215518,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002069091796875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005740390438586473,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.23797725439071654,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2977425754070282,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0034030748065561056,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034030748065561056,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.23797725439071654,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2977425754070282,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034030748065561056,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034030748065561056,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23797725439071654,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2977425754070282,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034030748065561056,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034030748065561056,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.23797725439071654,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2977425754070282,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034030748065561056,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034030748065561056,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.23797725439071654,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2977425754070282,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034030748065561056,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034030748065561056,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.23797725439071654,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2977425754070282,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034030748065561056,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034030748065561056,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.23797725439071654,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2977425754070282,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034030748065561056,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034030748065561056,
|
||
|
|
"step": 45
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.42311159847761515,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8411299022417736,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8770051162966203,
|
||
|
|
"calibration/confidence_entropy": 0.5590246418593998,
|
||
|
|
"calibration/coverage@0%": 0.0074310481898238745,
|
||
|
|
"calibration/coverage@1%": 0.0074310481898238745,
|
||
|
|
"calibration/coverage@10%": 0.014476057974559687,
|
||
|
|
"calibration/coverage@15%": 0.02190710616438356,
|
||
|
|
"calibration/coverage@20%": 0.029335861056751466,
|
||
|
|
"calibration/coverage@25%": 0.04691398605675147,
|
||
|
|
"calibration/coverage@30%": 0.1594415056262231,
|
||
|
|
"calibration/coverage@5%": 0.0074310481898238745,
|
||
|
|
"calibration/ece": 0.12798997594433206,
|
||
|
|
"calibration/mean_confidence": 0.3582420598922628,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0001953125,
|
||
|
|
"completions/max_length": 459.4,
|
||
|
|
"completions/max_terminated_length": 459.4,
|
||
|
|
"completions/mean_length": 146.5173828125,
|
||
|
|
"completions/mean_terminated_length": 146.54682312011718,
|
||
|
|
"completions/min_length": 34.2,
|
||
|
|
"completions/min_terminated_length": 59.8,
|
||
|
|
"epoch": 0.16,
|
||
|
|
"grad_norm": 0.0011785045498982072,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0006,
|
||
|
|
"num_tokens": 165029333.0,
|
||
|
|
"reward": 0.8984348177909851,
|
||
|
|
"reward_std": 0.0986061379313469,
|
||
|
|
"rewards/accuracy_reward": 0.4591796875,
|
||
|
|
"rewards/brier_reward": 0.748118782043457,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.854450786113739,
|
||
|
|
"rewards/format_reward": 0.999609375,
|
||
|
|
"rewards/frontier_coverage_0": 0.088801159709692,
|
||
|
|
"rewards/frontier_coverage_1": 0.088801159709692,
|
||
|
|
"rewards/frontier_coverage_10": 0.088801159709692,
|
||
|
|
"rewards/frontier_coverage_15": 0.088801159709692,
|
||
|
|
"rewards/frontier_coverage_20": 0.08754920139908791,
|
||
|
|
"rewards/frontier_coverage_25": 0.08266227170825005,
|
||
|
|
"rewards/frontier_coverage_5": 0.088801159709692,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15191650390625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.19589066207408906,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.453125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.075958251953125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.075958251953125,
|
||
|
|
"signal/advantage_abs_mean": 0.07748261094093323,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.07748261094093323,
|
||
|
|
"signal/advantage_pre_scale_std": 0.11596368849277497,
|
||
|
|
"signal/advantage_std": 0.11596368849277497,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.15048568248748778,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.18865303993225097,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015048568695783615,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015048568695783615,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0896127089858055,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.11197478771209717,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008961271494626999,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008961271494626999,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.24175618290901185,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.30336724519729613,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003457113401964307,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003457113401964307,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.24175618290901185,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.30336724519729613,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003457113401964307,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003457113401964307,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.24175618290901185,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.30336724519729613,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003457113401964307,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003457113401964307,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.24175618290901185,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.30336724519729613,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003457113401964307,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003457113401964307,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.23931825459003447,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3001833617687225,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034222510643303395,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034222510643303395,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2265275925397873,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2843614399433136,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032393445260822775,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032393445260822775,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.24175618290901185,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.30336724519729613,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003457113401964307,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003457113401964307,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16,
|
||
|
|
"eval_calibration/aurc": 0.5840922316708607,
|
||
|
|
"eval_calibration/batch_distribution_entropy": 0.7662788198859265,
|
||
|
|
"eval_calibration/buffer_distribution_entropy": 0.8621926897802064,
|
||
|
|
"eval_calibration/confidence_entropy": 0.5495364859557058,
|
||
|
|
"eval_calibration/coverage@0%": 0.0625,
|
||
|
|
"eval_calibration/coverage@1%": 0.0625,
|
||
|
|
"eval_calibration/coverage@10%": 0.0625,
|
||
|
|
"eval_calibration/coverage@15%": 0.0625,
|
||
|
|
"eval_calibration/coverage@20%": 0.0703125,
|
||
|
|
"eval_calibration/coverage@25%": 0.0703125,
|
||
|
|
"eval_calibration/coverage@30%": 0.109375,
|
||
|
|
"eval_calibration/coverage@5%": 0.0625,
|
||
|
|
"eval_calibration/ece": 0.18527343750000003,
|
||
|
|
"eval_calibration/mean_confidence": 0.35605468749999997,
|
||
|
|
"eval_completions/clipped_ratio": 0.0,
|
||
|
|
"eval_completions/max_length": 322.0,
|
||
|
|
"eval_completions/max_terminated_length": 322.0,
|
||
|
|
"eval_completions/mean_length": 155.77276229858398,
|
||
|
|
"eval_completions/mean_terminated_length": 155.77276229858398,
|
||
|
|
"eval_completions/min_length": 84.0,
|
||
|
|
"eval_completions/min_terminated_length": 84.0,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 165029333.0,
|
||
|
|
"eval_reward": 0.8566556125879288,
|
||
|
|
"eval_reward_std": 0.19755794480443,
|
||
|
|
"eval_rewards/accuracy_reward": 0.3671875,
|
||
|
|
"eval_rewards/brier_reward": 0.7759375870227814,
|
||
|
|
"eval_rewards/confidence_uniqueness_reward": 0.7936696708202362,
|
||
|
|
"eval_rewards/format_reward": 0.998046875,
|
||
|
|
"eval_rewards/frontier_coverage_0": 0.1754572968930006,
|
||
|
|
"eval_rewards/frontier_coverage_1": 0.1754572968930006,
|
||
|
|
"eval_rewards/frontier_coverage_10": 0.1754572968930006,
|
||
|
|
"eval_rewards/frontier_coverage_15": 0.17472604848444462,
|
||
|
|
"eval_rewards/frontier_coverage_20": 0.167778592556715,
|
||
|
|
"eval_rewards/frontier_coverage_25": 0.14990984462201595,
|
||
|
|
"eval_rewards/frontier_coverage_5": 0.1754572968930006,
|
||
|
|
"eval_runtime": 17.1563,
|
||
|
|
"eval_samples_per_second": 29.144,
|
||
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.44873046875,
|
||
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.48020416498184204,
|
||
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.224365234375,
|
||
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
||
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.224365234375,
|
||
|
|
"eval_signal/advantage_abs_mean": 0.1783592328429222,
|
||
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.1783592328429222,
|
||
|
|
"eval_signal/advantage_pre_scale_std": 0.19551436230540276,
|
||
|
|
"eval_signal/advantage_std": 0.19551436230540276,
|
||
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.17557190358638763,
|
||
|
|
"eval_signal/brier_reward/group_std_mean": 0.22150396928191185,
|
||
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01755719119682908,
|
||
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01755719119682908,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.1074361503124237,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.12650343775749207,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010743614984676242,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010743614984676242,
|
||
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
|
||
|
|
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
|
||
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
|
||
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
|
||
|
|
"eval_signal/format_reward/weight": 0.5,
|
||
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
|
||
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.3991696313023567,
|
||
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.46928417682647705,
|
||
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0057081254199147224,
|
||
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0057081254199147224,
|
||
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3991696313023567,
|
||
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.46928417682647705,
|
||
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0057081254199147224,
|
||
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0057081254199147224,
|
||
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3991696313023567,
|
||
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.46928417682647705,
|
||
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0057081254199147224,
|
||
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0057081254199147224,
|
||
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3979253023862839,
|
||
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.46783114969730377,
|
||
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005690331454388797,
|
||
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005690331454388797,
|
||
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3867493271827698,
|
||
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.4546607509255409,
|
||
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005530515452846885,
|
||
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005530515452846885,
|
||
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.3621833994984627,
|
||
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.42486173659563065,
|
||
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005179222673177719,
|
||
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005179222673177719,
|
||
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3991696313023567,
|
||
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.46928417682647705,
|
||
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0057081254199147224,
|
||
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0057081254199147224,
|
||
|
|
"eval_steps_per_second": 0.233,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.4252549136754009,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9125482359893882,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.8660492759848346,
|
||
|
|
"calibration/confidence_entropy": 0.5548617338405014,
|
||
|
|
"calibration/coverage@0%": 0.005087298189823874,
|
||
|
|
"calibration/coverage@1%": 0.005087298189823874,
|
||
|
|
"calibration/coverage@10%": 0.0054786876223091975,
|
||
|
|
"calibration/coverage@15%": 0.0054786876223091975,
|
||
|
|
"calibration/coverage@20%": 0.0054786876223091975,
|
||
|
|
"calibration/coverage@25%": 0.06684809197651663,
|
||
|
|
"calibration/coverage@30%": 0.17233824608610568,
|
||
|
|
"calibration/coverage@5%": 0.005087298189823874,
|
||
|
|
"calibration/ece": 0.17347517265132364,
|
||
|
|
"calibration/mean_confidence": 0.40544332355709123,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.000390625,
|
||
|
|
"completions/max_length": 453.6,
|
||
|
|
"completions/max_terminated_length": 453.6,
|
||
|
|
"completions/mean_length": 159.16650390625,
|
||
|
|
"completions/mean_terminated_length": 159.22799682617188,
|
||
|
|
"completions/min_length": 28.4,
|
||
|
|
"completions/min_terminated_length": 66.8,
|
||
|
|
"epoch": 0.176,
|
||
|
|
"grad_norm": 0.0013124002143740654,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0003,
|
||
|
|
"num_tokens": 181896318.0,
|
||
|
|
"reward": 0.8989975333213807,
|
||
|
|
"reward_std": 0.10167965888977051,
|
||
|
|
"rewards/accuracy_reward": 0.45263671875,
|
||
|
|
"rewards/brier_reward": 0.7518372178077698,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.8766632199287414,
|
||
|
|
"rewards/format_reward": 0.99912109375,
|
||
|
|
"rewards/frontier_coverage_0": 0.10452535524964332,
|
||
|
|
"rewards/frontier_coverage_1": 0.10452535524964332,
|
||
|
|
"rewards/frontier_coverage_10": 0.10452535524964332,
|
||
|
|
"rewards/frontier_coverage_15": 0.10412113443017006,
|
||
|
|
"rewards/frontier_coverage_20": 0.10193085297942162,
|
||
|
|
"rewards/frontier_coverage_25": 0.09392795115709304,
|
||
|
|
"rewards/frontier_coverage_5": 0.10452535524964332,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.151483154296875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.1968166172504425,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.45,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0757415771484375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0757415771484375,
|
||
|
|
"signal/advantage_abs_mean": 0.07838663980364799,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.07838663980364799,
|
||
|
|
"signal/advantage_pre_scale_std": 0.11850336343050002,
|
||
|
|
"signal/advantage_std": 0.11850336343050002,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1553162842988968,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.19515422284603118,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01553162857890129,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01553162857890129,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06829209327697754,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0883951410651207,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006829209346324206,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006829209346324206,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.001702880859375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.004971844470128417,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.24396900236606597,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.30476749539375303,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0034887567162513733,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034887567162513733,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.24396900236606597,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.30476749539375303,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034887567162513733,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034887567162513733,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.24396900236606597,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.30476749539375303,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034887567162513733,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034887567162513733,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.24291383922100068,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.30346350073814393,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003473667986690998,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003473667986690998,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.23362279534339905,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.29205312132835387,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003340805834159255,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003340805834159255,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1957554578781128,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24552590250968934,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002799303038045764,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002799303038045764,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.24396900236606597,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.30476749539375303,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034887567162513733,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034887567162513733,
|
||
|
|
"step": 55
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.3504090600956439,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9551707663234101,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9138769951806276,
|
||
|
|
"calibration/confidence_entropy": 0.5300997472761269,
|
||
|
|
"calibration/coverage@0%": 0.001171875,
|
||
|
|
"calibration/coverage@1%": 0.001171875,
|
||
|
|
"calibration/coverage@10%": 0.001171875,
|
||
|
|
"calibration/coverage@15%": 0.027734375,
|
||
|
|
"calibration/coverage@20%": 0.101171875,
|
||
|
|
"calibration/coverage@25%": 0.199609375,
|
||
|
|
"calibration/coverage@30%": 0.33515625,
|
||
|
|
"calibration/coverage@5%": 0.001171875,
|
||
|
|
"calibration/ece": 0.106851484375,
|
||
|
|
"calibration/mean_confidence": 0.44491546875,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0,
|
||
|
|
"completions/max_length": 492.4,
|
||
|
|
"completions/max_terminated_length": 492.4,
|
||
|
|
"completions/mean_length": 169.7375,
|
||
|
|
"completions/mean_terminated_length": 169.7375,
|
||
|
|
"completions/min_length": 75.8,
|
||
|
|
"completions/min_terminated_length": 75.8,
|
||
|
|
"epoch": 0.192,
|
||
|
|
"grad_norm": 0.0010296102846041322,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0002,
|
||
|
|
"num_tokens": 198449246.0,
|
||
|
|
"reward": 0.915556812286377,
|
||
|
|
"reward_std": 0.09649229347705841,
|
||
|
|
"rewards/accuracy_reward": 0.48076171875,
|
||
|
|
"rewards/brier_reward": 0.7584408164024353,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9000682353973388,
|
||
|
|
"rewards/format_reward": 0.99970703125,
|
||
|
|
"rewards/frontier_coverage_0": 0.09730687141418456,
|
||
|
|
"rewards/frontier_coverage_1": 0.09730687141418456,
|
||
|
|
"rewards/frontier_coverage_10": 0.09755967259407043,
|
||
|
|
"rewards/frontier_coverage_15": 0.09740926474332809,
|
||
|
|
"rewards/frontier_coverage_20": 0.09416202008724213,
|
||
|
|
"rewards/frontier_coverage_25": 0.08129325956106186,
|
||
|
|
"rewards/frontier_coverage_5": 0.09730687141418456,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.134930419921875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.17711263298988342,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.496875,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0674652099609375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0674652099609375,
|
||
|
|
"signal/advantage_abs_mean": 0.07492515221238136,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.07492515221238136,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1148703083395958,
|
||
|
|
"signal/advantage_std": 0.1148703083395958,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1652140736579895,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.20634177327156067,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01652140785008669,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01652140785008669,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.047895267605781555,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0618466705083847,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004789526853710413,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004789526853710413,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2331833630800247,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2929345488548279,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003334522061049938,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003334522061049938,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2331833630800247,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2929345488548279,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003334522061049938,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003334522061049938,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23196647465229034,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.29145088195800783,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033171205781400205,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033171205781400205,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2315136820077896,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29087979793548585,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003310645651072264,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003310645651072264,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21936435103416443,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2758824825286865,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031369101721793412,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031369101721793412,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18077392876148224,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22790210247039794,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025850672274827955,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025850672274827955,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2331833630800247,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2929345488548279,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003334522061049938,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003334522061049938,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.30815469042987864,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9787380668407992,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9640332476588573,
|
||
|
|
"calibration/confidence_entropy": 0.5174168360083795,
|
||
|
|
"calibration/coverage@0%": 0.0031288221624266145,
|
||
|
|
"calibration/coverage@1%": 0.0031288221624266145,
|
||
|
|
"calibration/coverage@10%": 0.03282855308219178,
|
||
|
|
"calibration/coverage@15%": 0.14299474070450097,
|
||
|
|
"calibration/coverage@20%": 0.3109780149217221,
|
||
|
|
"calibration/coverage@25%": 0.43832788038160475,
|
||
|
|
"calibration/coverage@30%": 0.5622477372798435,
|
||
|
|
"calibration/coverage@5%": 0.0031288221624266145,
|
||
|
|
"calibration/ece": 0.15338373930208382,
|
||
|
|
"calibration/mean_confidence": 0.4815208716065659,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00029296875,
|
||
|
|
"completions/max_length": 477.6,
|
||
|
|
"completions/max_terminated_length": 477.6,
|
||
|
|
"completions/mean_length": 178.60751953125,
|
||
|
|
"completions/mean_terminated_length": 178.66034545898438,
|
||
|
|
"completions/min_length": 45.8,
|
||
|
|
"completions/min_terminated_length": 77.6,
|
||
|
|
"epoch": 0.208,
|
||
|
|
"grad_norm": 0.0010420128237456083,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0004,
|
||
|
|
"num_tokens": 215310411.0,
|
||
|
|
"reward": 0.9366827011108398,
|
||
|
|
"reward_std": 0.10523568391799927,
|
||
|
|
"rewards/accuracy_reward": 0.525,
|
||
|
|
"rewards/brier_reward": 0.7601073861122132,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.910399055480957,
|
||
|
|
"rewards/format_reward": 0.99951171875,
|
||
|
|
"rewards/frontier_coverage_0": 0.07539150714874268,
|
||
|
|
"rewards/frontier_coverage_1": 0.07539150714874268,
|
||
|
|
"rewards/frontier_coverage_10": 0.07539150714874268,
|
||
|
|
"rewards/frontier_coverage_15": 0.07539150714874268,
|
||
|
|
"rewards/frontier_coverage_20": 0.07489909082651139,
|
||
|
|
"rewards/frontier_coverage_25": 0.06396199613809586,
|
||
|
|
"rewards/frontier_coverage_5": 0.07539150714874268,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14537353515625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.194059419631958,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.44375,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.072686767578125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.072686767578125,
|
||
|
|
"signal/advantage_abs_mean": 0.0802029699087143,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.0802029699087143,
|
||
|
|
"signal/advantage_pre_scale_std": 0.12353497147560119,
|
||
|
|
"signal/advantage_std": 0.12353497147560119,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.17113058865070344,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.2146961957216263,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017113059386610986,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017113059386610986,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.042478848993778226,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05312614291906357,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0042478849180042745,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042478849180042745,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.23045052886009215,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2944092571735382,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003295442508533597,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003295442508533597,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.23045052886009215,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2944092571735382,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003295442508533597,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003295442508533597,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23045052886009215,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2944092571735382,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003295442508533597,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003295442508533597,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.23045052886009215,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2944092571735382,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003295442508533597,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003295442508533597,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2168228805065155,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27752563655376433,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031005671713501214,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031005671713501214,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1687493294477463,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2179758220911026,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002413115510717034,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002413115510717034,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.23045052886009215,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2944092571735382,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003295442508533597,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003295442508533597,
|
||
|
|
"step": 65
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.33759827970691464,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9855316102239617,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9834165238932998,
|
||
|
|
"calibration/confidence_entropy": 0.48700275597752746,
|
||
|
|
"calibration/coverage@0%": 0.008609830062161851,
|
||
|
|
"calibration/coverage@1%": 0.008609830062161851,
|
||
|
|
"calibration/coverage@10%": 0.08637478236157477,
|
||
|
|
"calibration/coverage@15%": 0.1509052529402172,
|
||
|
|
"calibration/coverage@20%": 0.2587980183511761,
|
||
|
|
"calibration/coverage@25%": 0.3334923262969571,
|
||
|
|
"calibration/coverage@30%": 0.46375408896435283,
|
||
|
|
"calibration/coverage@5%": 0.02150045506216185,
|
||
|
|
"calibration/ece": 0.16715913862447945,
|
||
|
|
"calibration/mean_confidence": 0.4651282833566417,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00087890625,
|
||
|
|
"completions/max_length": 871.4,
|
||
|
|
"completions/max_terminated_length": 871.4,
|
||
|
|
"completions/mean_length": 182.4677734375,
|
||
|
|
"completions/mean_terminated_length": 182.62802734375,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 80.8,
|
||
|
|
"epoch": 0.224,
|
||
|
|
"grad_norm": 0.0008849430014379323,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0003,
|
||
|
|
"num_tokens": 232332065.0,
|
||
|
|
"reward": 0.9144545674324036,
|
||
|
|
"reward_std": 0.10017272233963012,
|
||
|
|
"rewards/accuracy_reward": 0.47197265625,
|
||
|
|
"rewards/brier_reward": 0.7604376077651978,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9159923315048217,
|
||
|
|
"rewards/format_reward": 0.99892578125,
|
||
|
|
"rewards/frontier_coverage_0": 0.1188489928841591,
|
||
|
|
"rewards/frontier_coverage_1": 0.1188489928841591,
|
||
|
|
"rewards/frontier_coverage_10": 0.1188489928841591,
|
||
|
|
"rewards/frontier_coverage_15": 0.1186547577381134,
|
||
|
|
"rewards/frontier_coverage_20": 0.11327697336673737,
|
||
|
|
"rewards/frontier_coverage_25": 0.08724054023623466,
|
||
|
|
"rewards/frontier_coverage_5": 0.1188489928841591,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.126641845703125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.16965427994728088,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.509375,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0633209228515625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0633209228515625,
|
||
|
|
"signal/advantage_abs_mean": 0.07543377876281739,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.07543377876281739,
|
||
|
|
"signal/advantage_pre_scale_std": 0.11968540549278259,
|
||
|
|
"signal/advantage_std": 0.11968540549278259,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1779782146215439,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.22407877445220947,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017797821387648582,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017797821387648582,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04430801272392273,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.055218780785799025,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004430801328271628,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004430801328271628,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002069091796875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005740390345454216,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.22609589099884034,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2900040984153748,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0032331712543964388,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0032331712543964388,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22609589099884034,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2900040984153748,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032331712543964388,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032331712543964388,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22609589099884034,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2900040984153748,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032331712543964388,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032331712543964388,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2218073219060898,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2847987115383148,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003171844594180584,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003171844594180584,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2043167382478714,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2630573481321335,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029217293485999107,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029217293485999107,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.15411110818386078,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.20011564791202546,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002203788794577122,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002203788794577122,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22609589099884034,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2900040984153748,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032331712543964388,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032331712543964388,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.362449931517634,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9679039549098876,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9911512020196188,
|
||
|
|
"calibration/confidence_entropy": 0.4805309846266669,
|
||
|
|
"calibration/coverage@0%": 0.006640625,
|
||
|
|
"calibration/coverage@1%": 0.006640625,
|
||
|
|
"calibration/coverage@10%": 0.113671875,
|
||
|
|
"calibration/coverage@15%": 0.168359375,
|
||
|
|
"calibration/coverage@20%": 0.2296875,
|
||
|
|
"calibration/coverage@25%": 0.269921875,
|
||
|
|
"calibration/coverage@30%": 0.325390625,
|
||
|
|
"calibration/coverage@5%": 0.082421875,
|
||
|
|
"calibration/ece": 0.17709150255198303,
|
||
|
|
"calibration/mean_confidence": 0.4958924159529189,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00078125,
|
||
|
|
"completions/max_length": 655.4,
|
||
|
|
"completions/max_terminated_length": 655.4,
|
||
|
|
"completions/mean_length": 187.84072265625,
|
||
|
|
"completions/mean_terminated_length": 187.98792114257813,
|
||
|
|
"completions/min_length": 15.4,
|
||
|
|
"completions/min_terminated_length": 82.6,
|
||
|
|
"epoch": 0.24,
|
||
|
|
"grad_norm": 0.001173818134702742,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0001,
|
||
|
|
"num_tokens": 249507234.0,
|
||
|
|
"reward": 0.9425259709358216,
|
||
|
|
"reward_std": 0.10851940363645554,
|
||
|
|
"rewards/accuracy_reward": 0.5373046875,
|
||
|
|
"rewards/brier_reward": 0.7545185923576355,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.91850825548172,
|
||
|
|
"rewards/format_reward": 0.9990234375,
|
||
|
|
"rewards/frontier_coverage_0": 0.07295873025432228,
|
||
|
|
"rewards/frontier_coverage_1": 0.07295873025432228,
|
||
|
|
"rewards/frontier_coverage_10": 0.07295873025432228,
|
||
|
|
"rewards/frontier_coverage_15": 0.07312564663589001,
|
||
|
|
"rewards/frontier_coverage_20": 0.06827028058469295,
|
||
|
|
"rewards/frontier_coverage_25": 0.06042120754718781,
|
||
|
|
"rewards/frontier_coverage_5": 0.07295873025432228,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14693603515625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.19192145466804506,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.459375,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.073468017578125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.073468017578125,
|
||
|
|
"signal/advantage_abs_mean": 0.0840824693441391,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.0840824693441391,
|
||
|
|
"signal/advantage_pre_scale_std": 0.12857705354690552,
|
||
|
|
"signal/advantage_std": 0.12857705354690552,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.18759630620479584,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.2353689730167389,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018759630247950555,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018759630247950555,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.047961297631263736,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05927042812108994,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0047961299307644365,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0047961299307644365,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00189208984375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005524271540343762,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2316052109003067,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3017585575580597,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0033119544852524994,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0033119544852524994,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2316052109003067,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3017585575580597,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033119544852524994,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033119544852524994,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2316052109003067,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3017585575580597,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033119544852524994,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033119544852524994,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2291814088821411,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2985960841178894,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003277294151484966,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003277294151484966,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21456428468227387,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.28007822036743163,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030682692769914864,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030682692769914864,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18277060687541963,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23978594839572906,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026136196684092283,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026136196684092283,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2316052109003067,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3017585575580597,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033119544852524994,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033119544852524994,
|
||
|
|
"step": 75
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.3081827301890542,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9624072780197552,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9915481436618189,
|
||
|
|
"calibration/confidence_entropy": 0.43016921259442914,
|
||
|
|
"calibration/coverage@0%": 0.018369312622309196,
|
||
|
|
"calibration/coverage@1%": 0.018369312622309196,
|
||
|
|
"calibration/coverage@10%": 0.09579409246575342,
|
||
|
|
"calibration/coverage@15%": 0.2114825403620352,
|
||
|
|
"calibration/coverage@20%": 0.3134922333659491,
|
||
|
|
"calibration/coverage@25%": 0.41742447407045014,
|
||
|
|
"calibration/coverage@30%": 0.5405080418297457,
|
||
|
|
"calibration/coverage@5%": 0.041806812622309196,
|
||
|
|
"calibration/ece": 0.13412265396494313,
|
||
|
|
"calibration/mean_confidence": 0.4988568931342644,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0009765625,
|
||
|
|
"completions/max_length": 672.0,
|
||
|
|
"completions/max_terminated_length": 672.0,
|
||
|
|
"completions/mean_length": 185.28642578125,
|
||
|
|
"completions/mean_terminated_length": 185.46622619628906,
|
||
|
|
"completions/min_length": 15.6,
|
||
|
|
"completions/min_terminated_length": 78.0,
|
||
|
|
"epoch": 0.256,
|
||
|
|
"grad_norm": 0.0011070192558690906,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0003,
|
||
|
|
"num_tokens": 266459383.0,
|
||
|
|
"reward": 0.9314518451690674,
|
||
|
|
"reward_std": 0.10366167277097701,
|
||
|
|
"rewards/accuracy_reward": 0.50625,
|
||
|
|
"rewards/brier_reward": 0.7604737877845764,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9207041382789611,
|
||
|
|
"rewards/format_reward": 0.99892578125,
|
||
|
|
"rewards/frontier_coverage_0": 0.11289391908794641,
|
||
|
|
"rewards/frontier_coverage_1": 0.11289391908794641,
|
||
|
|
"rewards/frontier_coverage_10": 0.11289391908794641,
|
||
|
|
"rewards/frontier_coverage_15": 0.1116494283080101,
|
||
|
|
"rewards/frontier_coverage_20": 0.10403509885072708,
|
||
|
|
"rewards/frontier_coverage_25": 0.08422165811061859,
|
||
|
|
"rewards/frontier_coverage_5": 0.11289391908794641,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.13895263671875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.18133105635643004,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4875,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.069476318359375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.069476318359375,
|
||
|
|
"signal/advantage_abs_mean": 0.07949463427066802,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.07949463427066802,
|
||
|
|
"signal/advantage_pre_scale_std": 0.12502660751342773,
|
||
|
|
"signal/advantage_std": 0.12502660751342773,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.18379815220832824,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.23144225180149078,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01837981529533863,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01837981529533863,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04970728680491447,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.061461112648248675,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004970728792250157,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004970728792250157,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002081298828125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.006076698703691363,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.965625,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.23146614134311677,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.298116660118103,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003309965645894408,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003309965645894408,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.23146614134311677,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.298116660118103,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003309965645894408,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003309965645894408,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23146614134311677,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.298116660118103,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003309965645894408,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003309965645894408,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22649968266487122,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29187222719192507,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003238945361226797,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003238945361226797,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21151622533798217,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2728983283042908,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030246819369494915,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030246819369494915,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16591953337192536,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.21466890573501587,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002372649358585477,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002372649358585477,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.23146614134311677,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.298116660118103,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003309965645894408,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003309965645894408,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.38003463125477943,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9735982384707729,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9822089271000805,
|
||
|
|
"calibration/confidence_entropy": 0.44327317264226096,
|
||
|
|
"calibration/coverage@0%": 0.010548403864970645,
|
||
|
|
"calibration/coverage@1%": 0.010548403864970645,
|
||
|
|
"calibration/coverage@10%": 0.09376911081213307,
|
||
|
|
"calibration/coverage@15%": 0.12697223581213307,
|
||
|
|
"calibration/coverage@20%": 0.14298786081213308,
|
||
|
|
"calibration/coverage@25%": 0.21884555406066536,
|
||
|
|
"calibration/coverage@30%": 0.2950342465753425,
|
||
|
|
"calibration/coverage@5%": 0.024610903864970647,
|
||
|
|
"calibration/ece": 0.16382442094629304,
|
||
|
|
"calibration/mean_confidence": 0.46472179519412593,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0005859375,
|
||
|
|
"completions/max_length": 708.2,
|
||
|
|
"completions/max_terminated_length": 708.2,
|
||
|
|
"completions/mean_length": 192.935546875,
|
||
|
|
"completions/mean_terminated_length": 193.04912719726562,
|
||
|
|
"completions/min_length": 15.4,
|
||
|
|
"completions/min_terminated_length": 80.2,
|
||
|
|
"epoch": 0.272,
|
||
|
|
"grad_norm": 0.0011620813747867942,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 283400739.0,
|
||
|
|
"reward": 0.9291788816452027,
|
||
|
|
"reward_std": 0.09849026650190354,
|
||
|
|
"rewards/accuracy_reward": 0.49697265625,
|
||
|
|
"rewards/brier_reward": 0.7616158962249756,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9286294102668762,
|
||
|
|
"rewards/format_reward": 0.9994140625,
|
||
|
|
"rewards/frontier_coverage_0": 0.12657882794737815,
|
||
|
|
"rewards/frontier_coverage_1": 0.12657882794737815,
|
||
|
|
"rewards/frontier_coverage_10": 0.1257509134709835,
|
||
|
|
"rewards/frontier_coverage_15": 0.1237585011869669,
|
||
|
|
"rewards/frontier_coverage_20": 0.11603162102401257,
|
||
|
|
"rewards/frontier_coverage_25": 0.09115448929369449,
|
||
|
|
"rewards/frontier_coverage_5": 0.12657882794737815,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.133734130859375,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.17143784165382386,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.53125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0668670654296875,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0668670654296875,
|
||
|
|
"signal/advantage_abs_mean": 0.07669400870800018,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.07669400870800018,
|
||
|
|
"signal/advantage_pre_scale_std": 0.11901784390211105,
|
||
|
|
"signal/advantage_std": 0.11901784390211105,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.18369872272014617,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.23071206510066986,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018369871750473978,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018369871750473978,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04494693949818611,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05543198511004448,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004494693968445063,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004494693968445063,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
||
|
|
"signal/format_reward/group_std_mean": 0.0033145629335194827,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.24683951139450072,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3129281342029572,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0035298048984259366,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035298048984259366,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.24683951139450072,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3129281342029572,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035298048984259366,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035298048984259366,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.24551962316036224,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3112900614738464,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035109306219965218,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035109306219965218,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.24138884544372557,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.30613497495651243,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034518604166805743,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034518604166805743,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2232038915157318,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2835902810096741,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031918155495077372,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031918155495077372,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17182688117027284,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.21931109428405762,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024571243207901715,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024571243207901715,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.24683951139450072,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3129281342029572,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035298048984259366,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035298048984259366,
|
||
|
|
"step": 85
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.33664580545577377,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9753172988887358,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9797373831788441,
|
||
|
|
"calibration/confidence_entropy": 0.4395737894823409,
|
||
|
|
"calibration/coverage@0%": 0.004303754892367906,
|
||
|
|
"calibration/coverage@1%": 0.004303754892367906,
|
||
|
|
"calibration/coverage@10%": 0.046573813600782776,
|
||
|
|
"calibration/coverage@15%": 0.105613227739726,
|
||
|
|
"calibration/coverage@20%": 0.20176201687866926,
|
||
|
|
"calibration/coverage@25%": 0.24827773361056754,
|
||
|
|
"calibration/coverage@30%": 0.36471761863992175,
|
||
|
|
"calibration/coverage@5%": 0.020742111056751465,
|
||
|
|
"calibration/ece": 0.13120126033724472,
|
||
|
|
"calibration/mean_confidence": 0.48288364484083246,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00078125,
|
||
|
|
"completions/max_length": 612.4,
|
||
|
|
"completions/max_terminated_length": 612.4,
|
||
|
|
"completions/mean_length": 186.45693359375,
|
||
|
|
"completions/mean_terminated_length": 186.6018280029297,
|
||
|
|
"completions/min_length": 13.4,
|
||
|
|
"completions/min_terminated_length": 81.0,
|
||
|
|
"epoch": 0.288,
|
||
|
|
"grad_norm": 0.0010369827505201101,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0003,
|
||
|
|
"num_tokens": 300268234.0,
|
||
|
|
"reward": 0.9325536012649536,
|
||
|
|
"reward_std": 0.09712951034307479,
|
||
|
|
"rewards/accuracy_reward": 0.50712890625,
|
||
|
|
"rewards/brier_reward": 0.7498034596443176,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9337062835693359,
|
||
|
|
"rewards/format_reward": 0.9990234375,
|
||
|
|
"rewards/frontier_coverage_0": 0.11477768123149872,
|
||
|
|
"rewards/frontier_coverage_1": 0.11477768123149872,
|
||
|
|
"rewards/frontier_coverage_10": 0.11409241706132889,
|
||
|
|
"rewards/frontier_coverage_15": 0.11274639219045639,
|
||
|
|
"rewards/frontier_coverage_20": 0.11127715855836869,
|
||
|
|
"rewards/frontier_coverage_25": 0.09562420099973679,
|
||
|
|
"rewards/frontier_coverage_5": 0.11477768123149872,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.133929443359375,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.1750246465206146,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.50625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0669647216796875,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0669647216796875,
|
||
|
|
"signal/advantage_abs_mean": 0.07482761144638062,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.07482761144638062,
|
||
|
|
"signal/advantage_pre_scale_std": 0.11771952509880065,
|
||
|
|
"signal/advantage_std": 0.11771952509880065,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.18906202912330627,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.23541579842567445,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01890620365738869,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01890620365738869,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04171065092086792,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.053112023323774335,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004171065147966146,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004171065147966146,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00189208984375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.00552427158690989,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2531146973371506,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3211081326007843,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003619540063664317,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003619540063664317,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2531146973371506,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3211081326007843,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003619540063664317,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003619540063664317,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2518859803676605,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.31956766843795775,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036019694991409777,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036019694991409777,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.24905660152435302,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3160142481327057,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003561509447172284,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003561509447172284,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2441376507282257,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.30990801453590394,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034911684226244686,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034911684226244686,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19897768795490264,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2540053725242615,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028453809674829243,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028453809674829243,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2531146973371506,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3211081326007843,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003619540063664317,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003619540063664317,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.3024819927679216,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9705055827465557,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9820260193634669,
|
||
|
|
"calibration/confidence_entropy": 0.43656159370883146,
|
||
|
|
"calibration/coverage@0%": 0.001171875,
|
||
|
|
"calibration/coverage@1%": 0.001171875,
|
||
|
|
"calibration/coverage@10%": 0.070703125,
|
||
|
|
"calibration/coverage@15%": 0.12421875,
|
||
|
|
"calibration/coverage@20%": 0.28102067025440314,
|
||
|
|
"calibration/coverage@25%": 0.39635824363992167,
|
||
|
|
"calibration/coverage@30%": 0.5680765349804305,
|
||
|
|
"calibration/coverage@5%": 0.001171875,
|
||
|
|
"calibration/ece": 0.14709306829226074,
|
||
|
|
"calibration/mean_confidence": 0.4818064076076837,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00068359375,
|
||
|
|
"completions/max_length": 758.6,
|
||
|
|
"completions/max_terminated_length": 758.6,
|
||
|
|
"completions/mean_length": 186.2091796875,
|
||
|
|
"completions/mean_terminated_length": 186.3367950439453,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 83.0,
|
||
|
|
"epoch": 0.304,
|
||
|
|
"grad_norm": 0.0009753642370924354,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0,
|
||
|
|
"num_tokens": 317104968.0,
|
||
|
|
"reward": 0.9321272730827331,
|
||
|
|
"reward_std": 0.09373017102479934,
|
||
|
|
"rewards/accuracy_reward": 0.505859375,
|
||
|
|
"rewards/brier_reward": 0.7468447089195251,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9352753400802613,
|
||
|
|
"rewards/format_reward": 0.9990234375,
|
||
|
|
"rewards/frontier_coverage_0": 0.11779828369617462,
|
||
|
|
"rewards/frontier_coverage_1": 0.11779828369617462,
|
||
|
|
"rewards/frontier_coverage_10": 0.11779828369617462,
|
||
|
|
"rewards/frontier_coverage_15": 0.11733001321554185,
|
||
|
|
"rewards/frontier_coverage_20": 0.11368912011384964,
|
||
|
|
"rewards/frontier_coverage_25": 0.10015616714954376,
|
||
|
|
"rewards/frontier_coverage_5": 0.11779828369617462,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.128564453125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.16599983870983123,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.540625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0642822265625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0642822265625,
|
||
|
|
"signal/advantage_abs_mean": 0.07163036465644837,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.07163036465644837,
|
||
|
|
"signal/advantage_pre_scale_std": 0.11185246258974076,
|
||
|
|
"signal/advantage_std": 0.11185246258974076,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1896709680557251,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.2372230350971222,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018967097997665404,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018967097997665404,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03978384882211685,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05163332596421242,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003978384891524911,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003978384891524911,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00189208984375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005524271633476019,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2604574590921402,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.32943272590637207,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0037245417013764383,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037245417013764383,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2604574590921402,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.32943272590637207,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037245417013764383,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037245417013764383,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2604574590921402,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.32943272590637207,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037245417013764383,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037245417013764383,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2594649285078049,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.32818403244018557,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037103486247360706,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037103486247360706,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.24546484351158143,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.31088122725486755,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00351014737971127,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00351014737971127,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20276572704315185,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2578410357236862,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028995498549193146,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028995498549193146,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2604574590921402,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.32943272590637207,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037245417013764383,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037245417013764383,
|
||
|
|
"step": 95
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2584915549059722,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9643361149508778,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9796284846798822,
|
||
|
|
"calibration/confidence_entropy": 0.4309896924466544,
|
||
|
|
"calibration/coverage@0%": 0.01484375,
|
||
|
|
"calibration/coverage@1%": 0.01484375,
|
||
|
|
"calibration/coverage@10%": 0.14946997549019608,
|
||
|
|
"calibration/coverage@15%": 0.3005147058823529,
|
||
|
|
"calibration/coverage@20%": 0.44199754901960786,
|
||
|
|
"calibration/coverage@25%": 0.5163174019607844,
|
||
|
|
"calibration/coverage@30%": 0.653125,
|
||
|
|
"calibration/coverage@5%": 0.046875,
|
||
|
|
"calibration/ece": 0.16068788980220403,
|
||
|
|
"calibration/mean_confidence": 0.5118728329765949,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00068359375,
|
||
|
|
"completions/max_length": 537.4,
|
||
|
|
"completions/max_terminated_length": 537.4,
|
||
|
|
"completions/mean_length": 185.09150390625,
|
||
|
|
"completions/mean_terminated_length": 185.21853942871093,
|
||
|
|
"completions/min_length": 32.8,
|
||
|
|
"completions/min_terminated_length": 85.2,
|
||
|
|
"epoch": 0.32,
|
||
|
|
"grad_norm": 0.000842822715640068,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0001,
|
||
|
|
"num_tokens": 334089009.0,
|
||
|
|
"reward": 0.9449261665344239,
|
||
|
|
"reward_std": 0.08346841335296631,
|
||
|
|
"rewards/accuracy_reward": 0.5263671875,
|
||
|
|
"rewards/brier_reward": 0.7642805576324463,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9409352779388428,
|
||
|
|
"rewards/format_reward": 0.99931640625,
|
||
|
|
"rewards/frontier_coverage_0": 0.11808980870991945,
|
||
|
|
"rewards/frontier_coverage_1": 0.11808980870991945,
|
||
|
|
"rewards/frontier_coverage_10": 0.11808980870991945,
|
||
|
|
"rewards/frontier_coverage_15": 0.11808980870991945,
|
||
|
|
"rewards/frontier_coverage_20": 0.11667817845009268,
|
||
|
|
"rewards/frontier_coverage_25": 0.1014600930036977,
|
||
|
|
"rewards/frontier_coverage_5": 0.11808980870991945,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09598388671875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.13364278078079223,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047991943359375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.047991943359375,
|
||
|
|
"signal/advantage_abs_mean": 0.06248387470841408,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.06248387470841408,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10356017798185349,
|
||
|
|
"signal/advantage_std": 0.10356017798185349,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1814287006855011,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.22994103133678437,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0181428711861372,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0181428711861372,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.036495928466320035,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.046190590411424634,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036495930049568414,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036495930049568414,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.001300048828125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.003194373194128275,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006500244140625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006500244140625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.22864521145820618,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.29235140681266786,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0032696264795958998,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0032696264795958998,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22864521145820618,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.29235140681266786,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032696264795958998,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032696264795958998,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22864521145820618,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.29235140681266786,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032696264795958998,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032696264795958998,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22864521145820618,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29235140681266786,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032696264795958998,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032696264795958998,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21406340301036836,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2742093026638031,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003061106661334634,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003061106661334634,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17238016426563263,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22113934755325318,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00246503627859056,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00246503627859056,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22864521145820618,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.29235140681266786,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032696264795958998,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032696264795958998,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32,
|
||
|
|
"eval_calibration/aurc": 0.4978971802283725,
|
||
|
|
"eval_calibration/batch_distribution_entropy": 0.8601270992336276,
|
||
|
|
"eval_calibration/buffer_distribution_entropy": 0.9743978817320665,
|
||
|
|
"eval_calibration/confidence_entropy": 0.383007351327093,
|
||
|
|
"eval_calibration/coverage@0%": 0.046875,
|
||
|
|
"eval_calibration/coverage@1%": 0.046875,
|
||
|
|
"eval_calibration/coverage@10%": 0.046875,
|
||
|
|
"eval_calibration/coverage@15%": 0.046875,
|
||
|
|
"eval_calibration/coverage@20%": 0.0546875,
|
||
|
|
"eval_calibration/coverage@25%": 0.1640625,
|
||
|
|
"eval_calibration/coverage@30%": 0.171875,
|
||
|
|
"eval_calibration/coverage@5%": 0.046875,
|
||
|
|
"eval_calibration/ece": 0.2747890625,
|
||
|
|
"eval_calibration/mean_confidence": 0.4455234375,
|
||
|
|
"eval_completions/clipped_ratio": 0.0,
|
||
|
|
"eval_completions/max_length": 316.25,
|
||
|
|
"eval_completions/max_terminated_length": 316.25,
|
||
|
|
"eval_completions/mean_length": 186.5537452697754,
|
||
|
|
"eval_completions/mean_terminated_length": 186.5537452697754,
|
||
|
|
"eval_completions/min_length": 94.75,
|
||
|
|
"eval_completions/min_terminated_length": 94.75,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 334089009.0,
|
||
|
|
"eval_reward": 0.8891656398773193,
|
||
|
|
"eval_reward_std": 0.21917415410280228,
|
||
|
|
"eval_rewards/accuracy_reward": 0.41015625,
|
||
|
|
"eval_rewards/brier_reward": 0.7693117260932922,
|
||
|
|
"eval_rewards/confidence_uniqueness_reward": 0.882080078125,
|
||
|
|
"eval_rewards/format_reward": 1.0,
|
||
|
|
"eval_rewards/frontier_coverage_0": 0.20451999828219414,
|
||
|
|
"eval_rewards/frontier_coverage_1": 0.20451999828219414,
|
||
|
|
"eval_rewards/frontier_coverage_10": 0.20451999828219414,
|
||
|
|
"eval_rewards/frontier_coverage_15": 0.20451999828219414,
|
||
|
|
"eval_rewards/frontier_coverage_20": 0.17699695751070976,
|
||
|
|
"eval_rewards/frontier_coverage_25": 0.1254621297121048,
|
||
|
|
"eval_rewards/frontier_coverage_5": 0.20451999828219414,
|
||
|
|
"eval_runtime": 17.6698,
|
||
|
|
"eval_samples_per_second": 28.297,
|
||
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.46484375,
|
||
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4892386645078659,
|
||
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.232421875,
|
||
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
||
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.232421875,
|
||
|
|
"eval_signal/advantage_abs_mean": 0.2005120925605297,
|
||
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2005120925605297,
|
||
|
|
"eval_signal/advantage_pre_scale_std": 0.2169787734746933,
|
||
|
|
"eval_signal/advantage_std": 0.2169787734746933,
|
||
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.23788422346115112,
|
||
|
|
"eval_signal/brier_reward/group_std_mean": 0.2953747808933258,
|
||
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023788423743098974,
|
||
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.023788423743098974,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0579986572265625,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06986325047910213,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0057998658157885075,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0057998658157885075,
|
||
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
||
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
||
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
||
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
||
|
|
"eval_signal/format_reward/weight": 0.5,
|
||
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.4016883596777916,
|
||
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.5005353167653084,
|
||
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005744143738411367,
|
||
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005744143738411367,
|
||
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.4016883596777916,
|
||
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.5005353167653084,
|
||
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005744143738411367,
|
||
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005744143738411367,
|
||
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.4016883596777916,
|
||
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.5005353167653084,
|
||
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005744143738411367,
|
||
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005744143738411367,
|
||
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.4016883596777916,
|
||
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.5005353167653084,
|
||
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005744143738411367,
|
||
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005744143738411367,
|
||
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3575369715690613,
|
||
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.44863685965538025,
|
||
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005112778628244996,
|
||
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005112778628244996,
|
||
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2680119276046753,
|
||
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.33913008868694305,
|
||
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038325704517774284,
|
||
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038325704517774284,
|
||
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.4016883596777916,
|
||
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.5005353167653084,
|
||
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005744143738411367,
|
||
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005744143738411367,
|
||
|
|
"eval_steps_per_second": 0.226,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.3131342794731158,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9569328620971553,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9708789338570618,
|
||
|
|
"calibration/confidence_entropy": 0.43368021237880383,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.05703125,
|
||
|
|
"calibration/coverage@15%": 0.078125,
|
||
|
|
"calibration/coverage@20%": 0.15157396648727986,
|
||
|
|
"calibration/coverage@25%": 0.3711403803816047,
|
||
|
|
"calibration/coverage@30%": 0.5071214530332682,
|
||
|
|
"calibration/coverage@5%": 0.01640625,
|
||
|
|
"calibration/ece": 0.15830718956356565,
|
||
|
|
"calibration/mean_confidence": 0.46587580069748835,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00087890625,
|
||
|
|
"completions/max_length": 623.4,
|
||
|
|
"completions/max_terminated_length": 623.4,
|
||
|
|
"completions/mean_length": 188.03359375,
|
||
|
|
"completions/mean_terminated_length": 188.19609375,
|
||
|
|
"completions/min_length": 16.6,
|
||
|
|
"completions/min_terminated_length": 84.6,
|
||
|
|
"epoch": 0.336,
|
||
|
|
"grad_norm": 0.0009528554510325193,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0006,
|
||
|
|
"num_tokens": 350736905.0,
|
||
|
|
"reward": 0.9469254612922668,
|
||
|
|
"reward_std": 0.09120655208826065,
|
||
|
|
"rewards/accuracy_reward": 0.53369140625,
|
||
|
|
"rewards/brier_reward": 0.7606370687484741,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9452253580093384,
|
||
|
|
"rewards/format_reward": 0.99912109375,
|
||
|
|
"rewards/frontier_coverage_0": 0.10427642688155174,
|
||
|
|
"rewards/frontier_coverage_1": 0.10427642688155174,
|
||
|
|
"rewards/frontier_coverage_10": 0.10427642688155174,
|
||
|
|
"rewards/frontier_coverage_15": 0.1033320739865303,
|
||
|
|
"rewards/frontier_coverage_20": 0.09279508143663406,
|
||
|
|
"rewards/frontier_coverage_25": 0.08138205781579018,
|
||
|
|
"rewards/frontier_coverage_5": 0.10427642688155174,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.112249755859375,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.1554201140999794,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.53125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0561248779296875,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0561248779296875,
|
||
|
|
"signal/advantage_abs_mean": 0.06810541898012161,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.06810541898012161,
|
||
|
|
"signal/advantage_pre_scale_std": 0.11220057159662247,
|
||
|
|
"signal/advantage_std": 0.11220057159662247,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1775924503803253,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.224722757935524,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01775924488902092,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01775924488902092,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03135449551045895,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04070526883006096,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031354496255517005,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031354496255517005,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.001690673828125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.00463553611189127,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.22398524582386017,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.28823475241661073,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003202988859266043,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003202988859266043,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22398524582386017,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.28823475241661073,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003202988859266043,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003202988859266043,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22398524582386017,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.28823475241661073,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003202988859266043,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003202988859266043,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21530932188034058,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27726990580558775,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030789232812821867,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030789232812821867,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18621681928634642,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24037640988826753,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026629004627466203,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026629004627466203,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14478266537189483,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.18697050213813782,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020703921094536782,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020703921094536782,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22398524582386017,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.28823475241661073,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003202988859266043,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003202988859266043,
|
||
|
|
"step": 105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.33687739417794316,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9215418352994096,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9673654878997088,
|
||
|
|
"calibration/confidence_entropy": 0.3971975919538019,
|
||
|
|
"calibration/coverage@0%": 0.019930283757338553,
|
||
|
|
"calibration/coverage@1%": 0.019930283757338553,
|
||
|
|
"calibration/coverage@10%": 0.08017062133072407,
|
||
|
|
"calibration/coverage@15%": 0.2220523483365949,
|
||
|
|
"calibration/coverage@20%": 0.33498425269080234,
|
||
|
|
"calibration/coverage@25%": 0.39986240215264185,
|
||
|
|
"calibration/coverage@30%": 0.45926109955968686,
|
||
|
|
"calibration/coverage@5%": 0.043002385029354204,
|
||
|
|
"calibration/ece": 0.1328964998265568,
|
||
|
|
"calibration/mean_confidence": 0.4276185512309817,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00087890625,
|
||
|
|
"completions/max_length": 622.0,
|
||
|
|
"completions/max_terminated_length": 622.0,
|
||
|
|
"completions/mean_length": 192.4009765625,
|
||
|
|
"completions/mean_terminated_length": 192.57146911621095,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 86.8,
|
||
|
|
"epoch": 0.352,
|
||
|
|
"grad_norm": 0.0010305993491783738,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0,
|
||
|
|
"num_tokens": 367967507.0,
|
||
|
|
"reward": 0.9195515155792237,
|
||
|
|
"reward_std": 0.09088908433914185,
|
||
|
|
"rewards/accuracy_reward": 0.46865234375,
|
||
|
|
"rewards/brier_reward": 0.7680434465408326,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9434323787689209,
|
||
|
|
"rewards/format_reward": 0.9990234375,
|
||
|
|
"rewards/frontier_coverage_0": 0.1578772708773613,
|
||
|
|
"rewards/frontier_coverage_1": 0.1578772708773613,
|
||
|
|
"rewards/frontier_coverage_10": 0.15736701637506484,
|
||
|
|
"rewards/frontier_coverage_15": 0.1525440275669098,
|
||
|
|
"rewards/frontier_coverage_20": 0.133266481757164,
|
||
|
|
"rewards/frontier_coverage_25": 0.10179407596588134,
|
||
|
|
"rewards/frontier_coverage_5": 0.1578772708773613,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.118914794921875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.15495410561561584,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.565625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0594573974609375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0594573974609375,
|
||
|
|
"signal/advantage_abs_mean": 0.0692843958735466,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.0692843958735466,
|
||
|
|
"signal/advantage_pre_scale_std": 0.11363352984189987,
|
||
|
|
"signal/advantage_std": 0.11363352984189987,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.17312630116939545,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.21913830041885377,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017312630265951156,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017312630265951156,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03259267956018448,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04234115481376648,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032592680305242538,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032592680305242538,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00186767578125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.00485165468417108,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000933837890625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000933837890625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.22767443656921388,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.29212393164634703,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003255744371563196,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003255744371563196,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22767443656921388,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.29212393164634703,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003255744371563196,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003255744371563196,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22719871401786804,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2915296196937561,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032489415258169173,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032489415258169173,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2192291349172592,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2817227363586426,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003134976560249925,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003134976560249925,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1940439224243164,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24981018900871277,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002774828253313899,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002774828253313899,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14172750413417817,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1828421801328659,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002026703301817179,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002026703301817179,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22767443656921388,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.29212393164634703,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003255744371563196,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003255744371563196,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.38035904158057876,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9363451585140791,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9557963506573927,
|
||
|
|
"calibration/confidence_entropy": 0.41220067846801395,
|
||
|
|
"calibration/coverage@0%": 0.002734375,
|
||
|
|
"calibration/coverage@1%": 0.002734375,
|
||
|
|
"calibration/coverage@10%": 0.01015625,
|
||
|
|
"calibration/coverage@15%": 0.115625,
|
||
|
|
"calibration/coverage@20%": 0.233203125,
|
||
|
|
"calibration/coverage@25%": 0.303125,
|
||
|
|
"calibration/coverage@30%": 0.346875,
|
||
|
|
"calibration/coverage@5%": 0.002734375,
|
||
|
|
"calibration/ece": 0.14852497762388386,
|
||
|
|
"calibration/mean_confidence": 0.5069563311666597,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00078125,
|
||
|
|
"completions/max_length": 635.8,
|
||
|
|
"completions/max_terminated_length": 635.8,
|
||
|
|
"completions/mean_length": 195.91884765625,
|
||
|
|
"completions/mean_terminated_length": 196.0728973388672,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 85.0,
|
||
|
|
"epoch": 0.368,
|
||
|
|
"grad_norm": 0.001282665878534317,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0006,
|
||
|
|
"num_tokens": 385039188.0,
|
||
|
|
"reward": 0.9295849800109863,
|
||
|
|
"reward_std": 0.08723250329494477,
|
||
|
|
"rewards/accuracy_reward": 0.4876953125,
|
||
|
|
"rewards/brier_reward": 0.7701516270637512,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9463400363922119,
|
||
|
|
"rewards/format_reward": 0.99912109375,
|
||
|
|
"rewards/frontier_coverage_0": 0.15154111981391907,
|
||
|
|
"rewards/frontier_coverage_1": 0.15154111981391907,
|
||
|
|
"rewards/frontier_coverage_10": 0.15106455981731415,
|
||
|
|
"rewards/frontier_coverage_15": 0.14973250329494475,
|
||
|
|
"rewards/frontier_coverage_20": 0.14436569213867187,
|
||
|
|
"rewards/frontier_coverage_25": 0.11632102131843566,
|
||
|
|
"rewards/frontier_coverage_5": 0.15135019719600679,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10582275390625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.14048746675252916,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.59375,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052911376953125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.052911376953125,
|
||
|
|
"signal/advantage_abs_mean": 0.06566806137561798,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.06566806137561798,
|
||
|
|
"signal/advantage_pre_scale_std": 0.11026655286550521,
|
||
|
|
"signal/advantage_std": 0.11026655286550521,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1696721464395523,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.21770406663417816,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01696721464395523,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01696721464395523,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03083474263548851,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04060439914464951,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030834743287414313,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030834743287414313,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.001702880859375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.004971844423562288,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20986934006214142,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2717552125453949,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00300113158300519,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00300113158300519,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20986934006214142,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2717552125453949,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00300113158300519,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00300113158300519,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20876844227313995,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.270385617017746,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002985388785600662,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002985388785600662,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2069230079650879,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26806753873825073,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002958999015390873,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002958999015390873,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19959359467029572,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2588253915309906,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028541883453726768,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028541883453726768,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16081069707870482,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.20880703628063202,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022995929699391126,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022995929699391126,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20957548320293426,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27138410210609437,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029969294089823963,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029969294089823963,
|
||
|
|
"step": 115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.3342946944021966,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9207722984951203,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9459169470031471,
|
||
|
|
"calibration/confidence_entropy": 0.39206068388821536,
|
||
|
|
"calibration/coverage@0%": 0.010560652842196094,
|
||
|
|
"calibration/coverage@1%": 0.010560652842196094,
|
||
|
|
"calibration/coverage@10%": 0.17484713156039572,
|
||
|
|
"calibration/coverage@15%": 0.2123777088598086,
|
||
|
|
"calibration/coverage@20%": 0.27841626683436826,
|
||
|
|
"calibration/coverage@25%": 0.3401617719713546,
|
||
|
|
"calibration/coverage@30%": 0.3886329070007088,
|
||
|
|
"calibration/coverage@5%": 0.09822347696156987,
|
||
|
|
"calibration/ece": 0.15159175750225923,
|
||
|
|
"calibration/mean_confidence": 0.4581007046100899,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00107421875,
|
||
|
|
"completions/max_length": 785.4,
|
||
|
|
"completions/max_terminated_length": 785.4,
|
||
|
|
"completions/mean_length": 192.38779296875,
|
||
|
|
"completions/mean_terminated_length": 192.59400329589843,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 88.4,
|
||
|
|
"epoch": 0.384,
|
||
|
|
"grad_norm": 0.0008075654623098671,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0006,
|
||
|
|
"num_tokens": 401865751.0,
|
||
|
|
"reward": 0.948432457447052,
|
||
|
|
"reward_std": 0.08624267876148224,
|
||
|
|
"rewards/accuracy_reward": 0.525,
|
||
|
|
"rewards/brier_reward": 0.7833073854446411,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9480945825576782,
|
||
|
|
"rewards/format_reward": 0.99892578125,
|
||
|
|
"rewards/frontier_coverage_0": 0.13540833443403244,
|
||
|
|
"rewards/frontier_coverage_1": 0.13540833443403244,
|
||
|
|
"rewards/frontier_coverage_10": 0.13540833443403244,
|
||
|
|
"rewards/frontier_coverage_15": 0.13456740379333496,
|
||
|
|
"rewards/frontier_coverage_20": 0.1337219536304474,
|
||
|
|
"rewards/frontier_coverage_25": 0.12220003008842469,
|
||
|
|
"rewards/frontier_coverage_5": 0.13540833443403244,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10770263671875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.14466220736503602,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.578125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.053851318359375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.053851318359375,
|
||
|
|
"signal/advantage_abs_mean": 0.06431769207119942,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.06431769207119942,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10962674617767335,
|
||
|
|
"signal/advantage_std": 0.10962674617767335,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.15990141928195953,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.2052460253238678,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01599014215171337,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01599014215171337,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02844097763299942,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03805322200059891,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028440977446734907,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028440977446734907,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002044677734375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005344869010150433,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010223388671875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0010223388671875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20318333506584169,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.262479567527771,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0029055217746645212,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029055217746645212,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20318333506584169,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.262479567527771,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029055217746645212,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029055217746645212,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20318333506584169,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.262479567527771,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029055217746645212,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029055217746645212,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2018148720264435,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26077154874801634,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028859527315944432,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028859527315944432,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19852499961853026,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2565582513809204,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002838907530531287,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002838907530531287,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1744537502527237,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22632047533988953,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002494688564911485,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002494688564911485,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20318333506584169,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.262479567527771,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029055217746645212,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029055217746645212,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.44081405388229433,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9532446689776313,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9478532000242842,
|
||
|
|
"calibration/confidence_entropy": 0.42983333782589755,
|
||
|
|
"calibration/coverage@0%": 0.005085031925343811,
|
||
|
|
"calibration/coverage@1%": 0.005085031925343811,
|
||
|
|
"calibration/coverage@10%": 0.01055378192534381,
|
||
|
|
"calibration/coverage@15%": 0.013288156925343811,
|
||
|
|
"calibration/coverage@20%": 0.033607563850687625,
|
||
|
|
"calibration/coverage@25%": 0.06876381385068761,
|
||
|
|
"calibration/coverage@30%": 0.1543106888506876,
|
||
|
|
"calibration/coverage@5%": 0.005085031925343811,
|
||
|
|
"calibration/ece": 0.20910516439126764,
|
||
|
|
"calibration/mean_confidence": 0.5139065532992861,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00068359375,
|
||
|
|
"completions/max_length": 694.2,
|
||
|
|
"completions/max_terminated_length": 694.2,
|
||
|
|
"completions/mean_length": 192.7953125,
|
||
|
|
"completions/mean_terminated_length": 192.92990112304688,
|
||
|
|
"completions/min_length": 37.4,
|
||
|
|
"completions/min_terminated_length": 93.2,
|
||
|
|
"epoch": 0.4,
|
||
|
|
"grad_norm": 0.0009383531287312508,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0004,
|
||
|
|
"num_tokens": 418876423.0,
|
||
|
|
"reward": 0.9311830878257752,
|
||
|
|
"reward_std": 0.09381079226732254,
|
||
|
|
"rewards/accuracy_reward": 0.4966796875,
|
||
|
|
"rewards/brier_reward": 0.758125364780426,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9506930947303772,
|
||
|
|
"rewards/format_reward": 0.99931640625,
|
||
|
|
"rewards/frontier_coverage_0": 0.1270300518721342,
|
||
|
|
"rewards/frontier_coverage_1": 0.1270300518721342,
|
||
|
|
"rewards/frontier_coverage_10": 0.12690381668508052,
|
||
|
|
"rewards/frontier_coverage_15": 0.12644278071820736,
|
||
|
|
"rewards/frontier_coverage_20": 0.12001086957752705,
|
||
|
|
"rewards/frontier_coverage_25": 0.10591514781117439,
|
||
|
|
"rewards/frontier_coverage_5": 0.1270300518721342,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1243408203125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.16100817918777466,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.553125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06217041015625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06217041015625,
|
||
|
|
"signal/advantage_abs_mean": 0.07223667949438095,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.07223667949438095,
|
||
|
|
"signal/advantage_pre_scale_std": 0.11894866228103637,
|
||
|
|
"signal/advantage_std": 0.11894866228103637,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.17008011043071747,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.2154710829257965,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017008011415600776,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017008011415600776,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02668723650276661,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03516421280801296,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002668723603710532,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002668723603710532,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
||
|
|
"signal/format_reward/group_std_mean": 0.003866990189999342,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20345945060253143,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2629883736371994,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002909470163285732,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002909470163285732,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20345945060253143,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2629883736371994,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002909470163285732,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002909470163285732,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20321880877017975,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26268347799777986,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029060290195047855,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029060290195047855,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2025788426399231,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26185826659202577,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028968773782253265,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028968773782253265,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1929622620344162,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24999802708625793,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027593603823333978,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027593603823333978,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16784164309501648,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2181039869785309,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002400135388597846,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002400135388597846,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20345945060253143,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2629883736371994,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002909470163285732,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002909470163285732,
|
||
|
|
"step": 125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.34096016807284873,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9683422027307792,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9644673694109379,
|
||
|
|
"calibration/confidence_entropy": 0.45812805550221186,
|
||
|
|
"calibration/coverage@0%": 0.0015655577299412914,
|
||
|
|
"calibration/coverage@1%": 0.0015655577299412914,
|
||
|
|
"calibration/coverage@10%": 0.0015655577299412914,
|
||
|
|
"calibration/coverage@15%": 0.010958904109589041,
|
||
|
|
"calibration/coverage@20%": 0.06301369863013698,
|
||
|
|
"calibration/coverage@25%": 0.15577299412915852,
|
||
|
|
"calibration/coverage@30%": 0.3874610139432485,
|
||
|
|
"calibration/coverage@5%": 0.0015655577299412914,
|
||
|
|
"calibration/ece": 0.12162173186513561,
|
||
|
|
"calibration/mean_confidence": 0.5221142927131631,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0009765625,
|
||
|
|
"completions/max_length": 706.6,
|
||
|
|
"completions/max_terminated_length": 706.6,
|
||
|
|
"completions/mean_length": 193.1140625,
|
||
|
|
"completions/mean_terminated_length": 193.30492248535157,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 89.2,
|
||
|
|
"epoch": 0.416,
|
||
|
|
"grad_norm": 0.000847649818751961,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0006,
|
||
|
|
"num_tokens": 435735095.0,
|
||
|
|
"reward": 0.9387712836265564,
|
||
|
|
"reward_std": 0.09340896159410476,
|
||
|
|
"rewards/accuracy_reward": 0.5076171875,
|
||
|
|
"rewards/brier_reward": 0.7740776777267456,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9541897296905517,
|
||
|
|
"rewards/format_reward": 0.9990234375,
|
||
|
|
"rewards/frontier_coverage_0": 0.13038937300443648,
|
||
|
|
"rewards/frontier_coverage_1": 0.13038937300443648,
|
||
|
|
"rewards/frontier_coverage_10": 0.13038937300443648,
|
||
|
|
"rewards/frontier_coverage_15": 0.12946571856737138,
|
||
|
|
"rewards/frontier_coverage_20": 0.12540235221385956,
|
||
|
|
"rewards/frontier_coverage_25": 0.10638794153928757,
|
||
|
|
"rewards/frontier_coverage_5": 0.13038937300443648,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1222900390625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.1587248280644417,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.559375,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06114501953125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06114501953125,
|
||
|
|
"signal/advantage_abs_mean": 0.07159682959318162,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.07159682959318162,
|
||
|
|
"signal/advantage_pre_scale_std": 0.11725033968687057,
|
||
|
|
"signal/advantage_std": 0.11725033968687057,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1667701780796051,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.2120150715112686,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016677017882466318,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016677017882466318,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02338048294186592,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.031994204968214035,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023380483500659464,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023380483500659464,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00189208984375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005524271540343762,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2134171187877655,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.27234098613262175,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003051864681765437,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003051864681765437,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2134171187877655,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27234098613262175,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003051864681765437,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003051864681765437,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2134171187877655,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27234098613262175,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003051864681765437,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003051864681765437,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21203236877918244,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2705941587686539,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003032062901183963,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003032062901183963,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20489224493503572,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.26140871942043303,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002929958933964372,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002929958933964372,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17466167509555816,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2234892874956131,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002497661951929331,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002497661951929331,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2134171187877655,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27234098613262175,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003051864681765437,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003051864681765437,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.28100625365213594,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9536777814014286,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9760632185498187,
|
||
|
|
"calibration/confidence_entropy": 0.42176649646530473,
|
||
|
|
"calibration/coverage@0%": 0.003131115459882583,
|
||
|
|
"calibration/coverage@1%": 0.003131115459882583,
|
||
|
|
"calibration/coverage@10%": 0.07123287671232877,
|
||
|
|
"calibration/coverage@15%": 0.18718698438279419,
|
||
|
|
"calibration/coverage@20%": 0.3110179962395917,
|
||
|
|
"calibration/coverage@25%": 0.39481524116495914,
|
||
|
|
"calibration/coverage@30%": 0.586499912464986,
|
||
|
|
"calibration/coverage@5%": 0.017221135029354205,
|
||
|
|
"calibration/ece": 0.11924453470120652,
|
||
|
|
"calibration/mean_confidence": 0.5154573468848538,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0013671875,
|
||
|
|
"completions/max_length": 614.0,
|
||
|
|
"completions/max_terminated_length": 614.0,
|
||
|
|
"completions/mean_length": 190.61611328125,
|
||
|
|
"completions/mean_terminated_length": 190.8771759033203,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 91.8,
|
||
|
|
"epoch": 0.432,
|
||
|
|
"grad_norm": 0.0009714950574561954,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0009,
|
||
|
|
"num_tokens": 452701340.0,
|
||
|
|
"reward": 0.9574862957000733,
|
||
|
|
"reward_std": 0.08374630659818649,
|
||
|
|
"rewards/accuracy_reward": 0.5421875,
|
||
|
|
"rewards/brier_reward": 0.7939852356910706,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9491989016532898,
|
||
|
|
"rewards/format_reward": 0.9986328125,
|
||
|
|
"rewards/frontier_coverage_0": 0.13479797691106796,
|
||
|
|
"rewards/frontier_coverage_1": 0.13479797691106796,
|
||
|
|
"rewards/frontier_coverage_10": 0.13445091098546982,
|
||
|
|
"rewards/frontier_coverage_15": 0.13207549303770066,
|
||
|
|
"rewards/frontier_coverage_20": 0.1242610327899456,
|
||
|
|
"rewards/frontier_coverage_25": 0.09696736782789231,
|
||
|
|
"rewards/frontier_coverage_5": 0.13479797691106796,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.11279296875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.14243824034929276,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.056396484375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.056396484375,
|
||
|
|
"signal/advantage_abs_mean": 0.06408916339278221,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.06408916339278221,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1099954828619957,
|
||
|
|
"signal/advantage_std": 0.1099954828619957,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.14954062998294831,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1921384632587433,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014954063296318054,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014954063296318054,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02739506885409355,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03796095065772533,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00273950700648129,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00273950700648129,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00264892578125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.007733980100601912,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.95625,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001324462890625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.001324462890625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20519976019859315,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.26305282711982725,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0029343565460294486,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029343565460294486,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20519976019859315,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26305282711982725,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029343565460294486,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029343565460294486,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2047807455062866,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2625007629394531,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002928364695981145,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002928364695981145,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19843302965164183,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.25481436848640443,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002837592316791415,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002837592316791415,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17869805097579955,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2304030865430832,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025553821586072447,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025553821586072447,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13113367408514023,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16951973736286163,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018752114614471794,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018752114614471794,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20519976019859315,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.26305282711982725,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029343565460294486,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029343565460294486,
|
||
|
|
"step": 135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2959369188581053,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9544817324016235,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9683931262060484,
|
||
|
|
"calibration/confidence_entropy": 0.4561862154425681,
|
||
|
|
"calibration/coverage@0%": 0.008605216487279844,
|
||
|
|
"calibration/coverage@1%": 0.008605216487279844,
|
||
|
|
"calibration/coverage@10%": 0.09666401663405087,
|
||
|
|
"calibration/coverage@15%": 0.14555864726027395,
|
||
|
|
"calibration/coverage@20%": 0.22690878791585128,
|
||
|
|
"calibration/coverage@25%": 0.35163129892367906,
|
||
|
|
"calibration/coverage@30%": 0.46030837206457925,
|
||
|
|
"calibration/coverage@5%": 0.06418251590019569,
|
||
|
|
"calibration/ece": 0.13582363568984318,
|
||
|
|
"calibration/mean_confidence": 0.561269420046022,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00087890625,
|
||
|
|
"completions/max_length": 506.6,
|
||
|
|
"completions/max_terminated_length": 506.6,
|
||
|
|
"completions/mean_length": 195.76884765625,
|
||
|
|
"completions/mean_terminated_length": 195.94118957519532,
|
||
|
|
"completions/min_length": 16.4,
|
||
|
|
"completions/min_terminated_length": 83.2,
|
||
|
|
"epoch": 0.448,
|
||
|
|
"grad_norm": 0.0007859326433390379,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0005,
|
||
|
|
"num_tokens": 469658813.0,
|
||
|
|
"reward": 0.9418084502220154,
|
||
|
|
"reward_std": 0.083807834982872,
|
||
|
|
"rewards/accuracy_reward": 0.51064453125,
|
||
|
|
"rewards/brier_reward": 0.7846155047416687,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9552451729774475,
|
||
|
|
"rewards/format_reward": 0.99912109375,
|
||
|
|
"rewards/frontier_coverage_0": 0.14308023303747178,
|
||
|
|
"rewards/frontier_coverage_1": 0.14308023303747178,
|
||
|
|
"rewards/frontier_coverage_10": 0.14308023303747178,
|
||
|
|
"rewards/frontier_coverage_15": 0.1384931057691574,
|
||
|
|
"rewards/frontier_coverage_20": 0.11155976802110672,
|
||
|
|
"rewards/frontier_coverage_25": 0.0824906125664711,
|
||
|
|
"rewards/frontier_coverage_5": 0.14308023303747178,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.107501220703125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.14100735783576965,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.59375,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0537506103515625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0537506103515625,
|
||
|
|
"signal/advantage_abs_mean": 0.06401537805795669,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.06401537805795669,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10888355821371079,
|
||
|
|
"signal/advantage_std": 0.10888355821371079,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.15687717795372008,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.19920064210891725,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015687718242406844,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015687718242406844,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02206093668937683,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02995873913168907,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022060936549678444,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022060936549678444,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.001690673828125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.004635536065325141,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20669485926628112,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.26254424154758454,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002955736452713609,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002955736452713609,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20669485926628112,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26254424154758454,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002955736452713609,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002955736452713609,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20669485926628112,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26254424154758454,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002955736452713609,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002955736452713609,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19869714379310607,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2524612307548523,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028413692489266396,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028413692489266396,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15222130417823793,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19457741677761078,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021767647005617617,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021767647005617617,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10508765280246735,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13414179980754853,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001502753491513431,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001502753491513431,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20669485926628112,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.26254424154758454,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002955736452713609,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002955736452713609,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.41172691860962296,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9676244227784199,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9721633945621548,
|
||
|
|
"calibration/confidence_entropy": 0.46077859438207697,
|
||
|
|
"calibration/coverage@0%": 0.0023452788649706456,
|
||
|
|
"calibration/coverage@1%": 0.0023452788649706456,
|
||
|
|
"calibration/coverage@10%": 0.0023452788649706456,
|
||
|
|
"calibration/coverage@15%": 0.005085004892367906,
|
||
|
|
"calibration/coverage@20%": 0.11152687744618395,
|
||
|
|
"calibration/coverage@25%": 0.15692652274951074,
|
||
|
|
"calibration/coverage@30%": 0.23550941780821918,
|
||
|
|
"calibration/coverage@5%": 0.0023452788649706456,
|
||
|
|
"calibration/ece": 0.14891257793805987,
|
||
|
|
"calibration/mean_confidence": 0.4762559844498826,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00146484375,
|
||
|
|
"completions/max_length": 770.0,
|
||
|
|
"completions/max_terminated_length": 770.0,
|
||
|
|
"completions/mean_length": 199.50302734375,
|
||
|
|
"completions/mean_terminated_length": 199.79557189941406,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 91.0,
|
||
|
|
"epoch": 0.464,
|
||
|
|
"grad_norm": 0.0010671120835468173,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0014,
|
||
|
|
"num_tokens": 486872540.0,
|
||
|
|
"reward": 0.9163182258605957,
|
||
|
|
"reward_std": 0.08222974836826324,
|
||
|
|
"rewards/accuracy_reward": 0.4634765625,
|
||
|
|
"rewards/brier_reward": 0.7600107669830323,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9506341218948364,
|
||
|
|
"rewards/format_reward": 0.9984375,
|
||
|
|
"rewards/frontier_coverage_0": 0.15062947571277618,
|
||
|
|
"rewards/frontier_coverage_1": 0.15062947571277618,
|
||
|
|
"rewards/frontier_coverage_10": 0.15062947571277618,
|
||
|
|
"rewards/frontier_coverage_15": 0.14993023201823236,
|
||
|
|
"rewards/frontier_coverage_20": 0.1425690233707428,
|
||
|
|
"rewards/frontier_coverage_25": 0.1047527477145195,
|
||
|
|
"rewards/frontier_coverage_5": 0.15062947571277618,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08927001953125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.12481658458709717,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044635009765625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044635009765625,
|
||
|
|
"signal/advantage_abs_mean": 0.05983325392007828,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.05983325392007828,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10508770495653152,
|
||
|
|
"signal/advantage_std": 0.10508770495653152,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.15510054528713227,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.19951906502246858,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015510055236518383,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015510055236518383,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025563039630651475,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.035719023644924165,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025563039351254703,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025563039351254703,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002978515625,
|
||
|
|
"signal/format_reward/group_std_mean": 0.007493600901216269,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.9625,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014892578125,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0014892578125,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19262197911739348,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24983831644058227,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002754494268447161,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002754494268447161,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19262197911739348,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24983831644058227,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002754494268447161,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002754494268447161,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19262197911739348,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24983831644058227,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002754494268447161,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002754494268447161,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18970014452934264,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24627826511859893,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002712712064385414,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002712712064385414,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17047151327133178,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22192566394805907,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024377426598221064,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024377426598221064,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11942090839147568,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1555239737033844,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017077189404517412,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017077189404517412,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19262197911739348,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24983831644058227,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002754494268447161,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002754494268447161,
|
||
|
|
"step": 145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2922078350827946,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9414541980973572,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9761675690904742,
|
||
|
|
"calibration/confidence_entropy": 0.43942815951524417,
|
||
|
|
"calibration/coverage@0%": 0.00664597602739726,
|
||
|
|
"calibration/coverage@1%": 0.00664597602739726,
|
||
|
|
"calibration/coverage@10%": 0.03599483243639921,
|
||
|
|
"calibration/coverage@15%": 0.21574578033268105,
|
||
|
|
"calibration/coverage@20%": 0.29977831457925636,
|
||
|
|
"calibration/coverage@25%": 0.4435520119863014,
|
||
|
|
"calibration/coverage@30%": 0.5006207191780823,
|
||
|
|
"calibration/coverage@5%": 0.01603932240704501,
|
||
|
|
"calibration/ece": 0.16072039812978933,
|
||
|
|
"calibration/mean_confidence": 0.4906872279118442,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00146484375,
|
||
|
|
"completions/max_length": 701.2,
|
||
|
|
"completions/max_terminated_length": 701.2,
|
||
|
|
"completions/mean_length": 198.86162109375,
|
||
|
|
"completions/mean_terminated_length": 199.15369873046876,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 93.4,
|
||
|
|
"epoch": 0.48,
|
||
|
|
"grad_norm": 0.0008275453001260757,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.001,
|
||
|
|
"num_tokens": 503956915.0,
|
||
|
|
"reward": 0.945784592628479,
|
||
|
|
"reward_std": 0.08588873744010925,
|
||
|
|
"rewards/accuracy_reward": 0.5220703125,
|
||
|
|
"rewards/brier_reward": 0.7770702838897705,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9446855783462524,
|
||
|
|
"rewards/format_reward": 0.99853515625,
|
||
|
|
"rewards/frontier_coverage_0": 0.1349175065755844,
|
||
|
|
"rewards/frontier_coverage_1": 0.1349175065755844,
|
||
|
|
"rewards/frontier_coverage_10": 0.1349175065755844,
|
||
|
|
"rewards/frontier_coverage_15": 0.1349175065755844,
|
||
|
|
"rewards/frontier_coverage_20": 0.13396640568971635,
|
||
|
|
"rewards/frontier_coverage_25": 0.12195459455251693,
|
||
|
|
"rewards/frontier_coverage_5": 0.1349175065755844,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1176513671875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.1545749545097351,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.559375,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05882568359375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05882568359375,
|
||
|
|
"signal/advantage_abs_mean": 0.0640151172876358,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.0640151172876358,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10988998115062713,
|
||
|
|
"signal/advantage_std": 0.10988998115062713,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.15043422877788543,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.19341982007026673,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01504342332482338,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01504342332482338,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030228468775749206,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.040879550576210025,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003022846952080727,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003022846952080727,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002825927734375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.007950099045410752,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.95625,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014129638671875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0014129638671875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20872920453548433,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.26947686076164246,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0029848274774849416,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029848274774849416,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20872920453548433,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26947686076164246,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029848274774849416,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029848274774849416,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20872920453548433,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26947686076164246,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029848274774849416,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029848274774849416,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20872920453548433,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26947686076164246,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029848274774849416,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029848274774849416,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20555395781993865,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2654797852039337,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029394214041531088,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029394214041531088,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18047940731048584,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23378402590751649,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025808554608374836,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025808554608374836,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20872920453548433,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.26947686076164246,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029848274774849416,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029848274774849416,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48,
|
||
|
|
"eval_calibration/aurc": 0.5075121591479552,
|
||
|
|
"eval_calibration/batch_distribution_entropy": 0.8902093701786203,
|
||
|
|
"eval_calibration/buffer_distribution_entropy": 0.9694623596330962,
|
||
|
|
"eval_calibration/confidence_entropy": 0.4281672843683656,
|
||
|
|
"eval_calibration/coverage@0%": 0.046875,
|
||
|
|
"eval_calibration/coverage@1%": 0.046875,
|
||
|
|
"eval_calibration/coverage@10%": 0.046875,
|
||
|
|
"eval_calibration/coverage@15%": 0.09375,
|
||
|
|
"eval_calibration/coverage@20%": 0.1171875,
|
||
|
|
"eval_calibration/coverage@25%": 0.140625,
|
||
|
|
"eval_calibration/coverage@30%": 0.1484375,
|
||
|
|
"eval_calibration/coverage@5%": 0.046875,
|
||
|
|
"eval_calibration/ece": 0.2268416569545887,
|
||
|
|
"eval_calibration/mean_confidence": 0.42231040695458877,
|
||
|
|
"eval_completions/clipped_ratio": 0.002155172413793094,
|
||
|
|
"eval_completions/max_length": 458.5,
|
||
|
|
"eval_completions/max_terminated_length": 458.5,
|
||
|
|
"eval_completions/mean_length": 201.06047821044922,
|
||
|
|
"eval_completions/mean_terminated_length": 201.51300811767578,
|
||
|
|
"eval_completions/min_length": 89.25,
|
||
|
|
"eval_completions/min_terminated_length": 120.75,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 503956915.0,
|
||
|
|
"eval_reward": 0.9004999846220016,
|
||
|
|
"eval_reward_std": 0.21980078145861626,
|
||
|
|
"eval_rewards/accuracy_reward": 0.427734375,
|
||
|
|
"eval_rewards/brier_reward": 0.7848155051469803,
|
||
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8925072550773621,
|
||
|
|
"eval_rewards/format_reward": 0.998046875,
|
||
|
|
"eval_rewards/frontier_coverage_0": 0.20046903938055038,
|
||
|
|
"eval_rewards/frontier_coverage_1": 0.20046903938055038,
|
||
|
|
"eval_rewards/frontier_coverage_10": 0.20046903938055038,
|
||
|
|
"eval_rewards/frontier_coverage_15": 0.20046903938055038,
|
||
|
|
"eval_rewards/frontier_coverage_20": 0.19921957328915596,
|
||
|
|
"eval_rewards/frontier_coverage_25": 0.18844087794423103,
|
||
|
|
"eval_rewards/frontier_coverage_5": 0.20046903938055038,
|
||
|
|
"eval_runtime": 28.9081,
|
||
|
|
"eval_samples_per_second": 17.296,
|
||
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4732666015625,
|
||
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49377230554819107,
|
||
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23663330078125,
|
||
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
||
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23663330078125,
|
||
|
|
"eval_signal/advantage_abs_mean": 0.20204394310712814,
|
||
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20204394310712814,
|
||
|
|
"eval_signal/advantage_pre_scale_std": 0.2179497443139553,
|
||
|
|
"eval_signal/advantage_std": 0.2179497443139553,
|
||
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.20935826003551483,
|
||
|
|
"eval_signal/brier_reward/group_std_mean": 0.2633591406047344,
|
||
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020935827400535345,
|
||
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.020935827400535345,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05121587961912155,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0662338575348258,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005121587892062962,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005121587892062962,
|
||
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
|
||
|
|
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
|
||
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
|
||
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
|
||
|
|
"eval_signal/format_reward/weight": 0.5,
|
||
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
|
||
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.39673639088869095,
|
||
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.48953280597925186,
|
||
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005673330393619835,
|
||
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005673330393619835,
|
||
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.39673639088869095,
|
||
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.48953280597925186,
|
||
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005673330393619835,
|
||
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005673330393619835,
|
||
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.39673639088869095,
|
||
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.48953280597925186,
|
||
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005673330393619835,
|
||
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005673330393619835,
|
||
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.39673639088869095,
|
||
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.48953280597925186,
|
||
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005673330393619835,
|
||
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005673330393619835,
|
||
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.39225903898477554,
|
||
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.4843933880329132,
|
||
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005609303945675492,
|
||
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005609303945675492,
|
||
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.36136313527822495,
|
||
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.44835418462753296,
|
||
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005167493014596403,
|
||
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005167493014596403,
|
||
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.39673639088869095,
|
||
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.48953280597925186,
|
||
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005673330393619835,
|
||
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005673330393619835,
|
||
|
|
"eval_steps_per_second": 0.138,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.38781416027199905,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9480112592873683,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9613049183482241,
|
||
|
|
"calibration/confidence_entropy": 0.43738759589147735,
|
||
|
|
"calibration/coverage@0%": 0.01602709148727984,
|
||
|
|
"calibration/coverage@1%": 0.01602709148727984,
|
||
|
|
"calibration/coverage@10%": 0.11602862035225048,
|
||
|
|
"calibration/coverage@15%": 0.13870015900195692,
|
||
|
|
"calibration/coverage@20%": 0.1789895911275469,
|
||
|
|
"calibration/coverage@25%": 0.22672386720099,
|
||
|
|
"calibration/coverage@30%": 0.2748502971394037,
|
||
|
|
"calibration/coverage@5%": 0.04571459148727984,
|
||
|
|
"calibration/ece": 0.15164128651966327,
|
||
|
|
"calibration/mean_confidence": 0.49700723151505083,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00126953125,
|
||
|
|
"completions/max_length": 620.6,
|
||
|
|
"completions/max_terminated_length": 620.6,
|
||
|
|
"completions/mean_length": 203.96142578125,
|
||
|
|
"completions/mean_terminated_length": 204.2204620361328,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 98.8,
|
||
|
|
"epoch": 0.496,
|
||
|
|
"grad_norm": 0.000855308840982616,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0007,
|
||
|
|
"num_tokens": 521353320.0,
|
||
|
|
"reward": 0.9558730244636535,
|
||
|
|
"reward_std": 0.08216302543878555,
|
||
|
|
"rewards/accuracy_reward": 0.5431640625,
|
||
|
|
"rewards/brier_reward": 0.7816872239112854,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9526494383811951,
|
||
|
|
"rewards/format_reward": 0.99873046875,
|
||
|
|
"rewards/frontier_coverage_0": 0.11778900772333145,
|
||
|
|
"rewards/frontier_coverage_1": 0.11778900772333145,
|
||
|
|
"rewards/frontier_coverage_10": 0.11775302439928055,
|
||
|
|
"rewards/frontier_coverage_15": 0.11705803275108337,
|
||
|
|
"rewards/frontier_coverage_20": 0.11374893933534622,
|
||
|
|
"rewards/frontier_coverage_25": 0.10171589106321335,
|
||
|
|
"rewards/frontier_coverage_5": 0.11778900772333145,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09918212890625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.13363586366176605,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049591064453125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049591064453125,
|
||
|
|
"signal/advantage_abs_mean": 0.06129216924309731,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.06129216924309731,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1069572851061821,
|
||
|
|
"signal/advantage_std": 0.1069572851061821,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1463674783706665,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.19021452069282532,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014636747911572457,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014636747911572457,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02377520613372326,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.032842491567134854,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023775207344442608,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023775207344442608,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002435302734375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.006508936267346143,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.965625,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012176513671875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0012176513671875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18798189163208007,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24676340222358703,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002688140980899334,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002688140980899334,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18798189163208007,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24676340222358703,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002688140980899334,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002688140980899334,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18778745234012603,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24652613401412965,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026853605639189483,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026853605639189483,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18584738671779633,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2440545976161957,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026576175820082425,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026576175820082425,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17214059233665466,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22665069103240967,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024616105481982233,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024616105481982233,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13599956333637236,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.17969924956560135,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019447937374934554,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019447937374934554,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18798189163208007,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24676340222358703,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002688140980899334,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002688140980899334,
|
||
|
|
"step": 155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.3185750943214118,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9608030156492443,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9600624228150858,
|
||
|
|
"calibration/confidence_entropy": 0.4684197472341688,
|
||
|
|
"calibration/coverage@0%": 0.026301559694577835,
|
||
|
|
"calibration/coverage@1%": 0.026301559694577835,
|
||
|
|
"calibration/coverage@10%": 0.17225483884261378,
|
||
|
|
"calibration/coverage@15%": 0.26192426369382427,
|
||
|
|
"calibration/coverage@20%": 0.331605793010546,
|
||
|
|
"calibration/coverage@25%": 0.43373122169347056,
|
||
|
|
"calibration/coverage@30%": 0.4939625122309197,
|
||
|
|
"calibration/coverage@5%": 0.04634085242542263,
|
||
|
|
"calibration/ece": 0.14288779158153292,
|
||
|
|
"calibration/mean_confidence": 0.5118868218103911,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00087890625,
|
||
|
|
"completions/max_length": 656.0,
|
||
|
|
"completions/max_terminated_length": 656.0,
|
||
|
|
"completions/mean_length": 202.19775390625,
|
||
|
|
"completions/mean_terminated_length": 202.37572631835937,
|
||
|
|
"completions/min_length": 38.6,
|
||
|
|
"completions/min_terminated_length": 94.0,
|
||
|
|
"epoch": 0.512,
|
||
|
|
"grad_norm": 0.0008858853834681213,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0002,
|
||
|
|
"num_tokens": 538569489.0,
|
||
|
|
"reward": 0.9573903560638428,
|
||
|
|
"reward_std": 0.08361846506595612,
|
||
|
|
"rewards/accuracy_reward": 0.5423828125,
|
||
|
|
"rewards/brier_reward": 0.7953786492347718,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9543616294860839,
|
||
|
|
"rewards/format_reward": 0.99912109375,
|
||
|
|
"rewards/frontier_coverage_0": 0.12758512943983077,
|
||
|
|
"rewards/frontier_coverage_1": 0.12758512943983077,
|
||
|
|
"rewards/frontier_coverage_10": 0.12758906930685043,
|
||
|
|
"rewards/frontier_coverage_15": 0.12283221930265427,
|
||
|
|
"rewards/frontier_coverage_20": 0.1063217431306839,
|
||
|
|
"rewards/frontier_coverage_25": 0.07619328275322915,
|
||
|
|
"rewards/frontier_coverage_5": 0.12758512943983077,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1026611328125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.14087689369916917,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05133056640625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05133056640625,
|
||
|
|
"signal/advantage_abs_mean": 0.061477158963680265,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.061477158963680265,
|
||
|
|
"signal/advantage_pre_scale_std": 0.11054081916809082,
|
||
|
|
"signal/advantage_std": 0.11054081916809082,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13833895027637483,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.18004256784915923,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01383389551192522,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01383389551192522,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021861471980810166,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02985510379076004,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002186147286556661,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002186147286556661,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.001702880859375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.004971844470128417,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17110781669616698,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2254611998796463,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002446841774508357,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002446841774508357,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17110781669616698,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2254611998796463,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002446841774508357,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002446841774508357,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17093735337257385,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22522563338279725,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024444041773676872,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024444041773676872,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16177424490451814,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21304078996181489,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023133717477321625,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023133717477321625,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.12729571759700775,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16839581429958345,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018203288316726684,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018203288316726684,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0875883400440216,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11519992053508758,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012525132391601802,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012525132391601802,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17110781669616698,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2254611998796463,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002446841774508357,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002446841774508357,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2115114488085529,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9620413869289213,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9703211218417002,
|
||
|
|
"calibration/confidence_entropy": 0.441188989082889,
|
||
|
|
"calibration/coverage@0%": 0.03561414505870841,
|
||
|
|
"calibration/coverage@1%": 0.03561414505870841,
|
||
|
|
"calibration/coverage@10%": 0.23783252813111547,
|
||
|
|
"calibration/coverage@15%": 0.46140380381604695,
|
||
|
|
"calibration/coverage@20%": 0.5477915545499021,
|
||
|
|
"calibration/coverage@25%": 0.6400348581213307,
|
||
|
|
"calibration/coverage@30%": 0.7490780944227006,
|
||
|
|
"calibration/coverage@5%": 0.12045162671232876,
|
||
|
|
"calibration/ece": 0.12680648981791925,
|
||
|
|
"calibration/mean_confidence": 0.5183541512637606,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0009765625,
|
||
|
|
"completions/max_length": 629.0,
|
||
|
|
"completions/max_terminated_length": 629.0,
|
||
|
|
"completions/mean_length": 202.233984375,
|
||
|
|
"completions/mean_terminated_length": 202.43136291503907,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 95.8,
|
||
|
|
"epoch": 0.528,
|
||
|
|
"grad_norm": 0.0007963773678056896,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0006,
|
||
|
|
"num_tokens": 555669901.0,
|
||
|
|
"reward": 0.956901216506958,
|
||
|
|
"reward_std": 0.08233840018510818,
|
||
|
|
"rewards/accuracy_reward": 0.5412109375,
|
||
|
|
"rewards/brier_reward": 0.7963914513587952,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9506345868110657,
|
||
|
|
"rewards/format_reward": 0.9990234375,
|
||
|
|
"rewards/frontier_coverage_0": 0.13646617978811265,
|
||
|
|
"rewards/frontier_coverage_1": 0.13646617978811265,
|
||
|
|
"rewards/frontier_coverage_10": 0.12993793487548827,
|
||
|
|
"rewards/frontier_coverage_15": 0.11923633962869644,
|
||
|
|
"rewards/frontier_coverage_20": 0.10301651507616043,
|
||
|
|
"rewards/frontier_coverage_25": 0.08392253071069718,
|
||
|
|
"rewards/frontier_coverage_5": 0.13580917268991471,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1123046875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.14442603141069413,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05615234375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05615234375,
|
||
|
|
"signal/advantage_abs_mean": 0.06305904388427734,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.06305904388427734,
|
||
|
|
"signal/advantage_pre_scale_std": 0.11096402853727341,
|
||
|
|
"signal/advantage_std": 0.11096402853727341,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.14149291813373566,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1800965338945389,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014149292558431625,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014149292558431625,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025094441324472427,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03405176103115082,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025094441138207913,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025094441138207913,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.0018798828125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005187963275238872,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00093994140625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1912826269865036,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24701529741287231,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002735341480001807,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002735341480001807,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1912826269865036,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24701529741287231,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002735341480001807,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002735341480001807,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18224962651729584,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2354464501142502,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026061696466058494,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026061696466058494,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16217685639858245,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20974383354187012,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023191290441900493,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023191290441900493,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1335637390613556,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.17252120077610017,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019099614582955837,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019099614582955837,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0971255749464035,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12469170093536378,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001388895697891712,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001388895697891712,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19096426665782928,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24660421013832093,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002730788942426443,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002730788942426443,
|
||
|
|
"step": 165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2552697387528125,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9281181802421189,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9703870048611121,
|
||
|
|
"calibration/confidence_entropy": 0.42132426701062115,
|
||
|
|
"calibration/coverage@0%": 0.004296875,
|
||
|
|
"calibration/coverage@1%": 0.004296875,
|
||
|
|
"calibration/coverage@10%": 0.0781387597847358,
|
||
|
|
"calibration/coverage@15%": 0.18566918419765166,
|
||
|
|
"calibration/coverage@20%": 0.390991927592955,
|
||
|
|
"calibration/coverage@25%": 0.5626903436888454,
|
||
|
|
"calibration/coverage@30%": 0.6912610078277887,
|
||
|
|
"calibration/coverage@5%": 0.02578125,
|
||
|
|
"calibration/ece": 0.09877190065298394,
|
||
|
|
"calibration/mean_confidence": 0.5423266186064797,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00078125,
|
||
|
|
"completions/max_length": 783.0,
|
||
|
|
"completions/max_terminated_length": 783.0,
|
||
|
|
"completions/mean_length": 205.52802734375,
|
||
|
|
"completions/mean_terminated_length": 205.68857421875,
|
||
|
|
"completions/min_length": 38.2,
|
||
|
|
"completions/min_terminated_length": 92.8,
|
||
|
|
"epoch": 0.544,
|
||
|
|
"grad_norm": 0.0009505171910859644,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0005,
|
||
|
|
"num_tokens": 572938092.0,
|
||
|
|
"reward": 0.9629211902618409,
|
||
|
|
"reward_std": 0.08926361948251724,
|
||
|
|
"rewards/accuracy_reward": 0.56416015625,
|
||
|
|
"rewards/brier_reward": 0.7771433115005493,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9545076489448547,
|
||
|
|
"rewards/format_reward": 0.99912109375,
|
||
|
|
"rewards/frontier_coverage_0": 0.0925481453537941,
|
||
|
|
"rewards/frontier_coverage_1": 0.0925481453537941,
|
||
|
|
"rewards/frontier_coverage_10": 0.08745588660240174,
|
||
|
|
"rewards/frontier_coverage_15": 0.07613595128059387,
|
||
|
|
"rewards/frontier_coverage_20": 0.06427086591720581,
|
||
|
|
"rewards/frontier_coverage_25": 0.06200792193412781,
|
||
|
|
"rewards/frontier_coverage_5": 0.0925481453537941,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.123321533203125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.16205840706825256,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.540625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0616607666015625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0616607666015625,
|
||
|
|
"signal/advantage_abs_mean": 0.06674028560519218,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.06674028560519218,
|
||
|
|
"signal/advantage_pre_scale_std": 0.11618766188621521,
|
||
|
|
"signal/advantage_std": 0.11618766188621521,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1495936095714569,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.19124016761779786,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014959361404180527,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014959361404180527,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022427035868167876,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.030667876452207567,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022427036659792066,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022427036659792066,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.001690673828125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.004635535925626755,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19233616888523103,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2488338440656662,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002750407112762332,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002750407112762332,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19233616888523103,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2488338440656662,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002750407112762332,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002750407112762332,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18275881111621856,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23677020370960236,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002613450959324837,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002613450959324837,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15216105580329894,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1980179637670517,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002175903180614114,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002175903180614114,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.12181595414876938,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.15854274928569795,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017419681418687106,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017419681418687106,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09723487794399262,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12516086548566818,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001390458783134818,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001390458783134818,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19233616888523103,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2488338440656662,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002750407112762332,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002750407112762332,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2696651710957544,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9535177713606812,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9551885689032709,
|
||
|
|
"calibration/confidence_entropy": 0.44521922125916424,
|
||
|
|
"calibration/coverage@0%": 0.023067514677103718,
|
||
|
|
"calibration/coverage@1%": 0.09767688967710372,
|
||
|
|
"calibration/coverage@10%": 0.21059962084148726,
|
||
|
|
"calibration/coverage@15%": 0.3393285225048924,
|
||
|
|
"calibration/coverage@20%": 0.37964163405088064,
|
||
|
|
"calibration/coverage@25%": 0.4587022994129158,
|
||
|
|
"calibration/coverage@30%": 0.5232815557729942,
|
||
|
|
"calibration/coverage@5%": 0.1785423801369863,
|
||
|
|
"calibration/ece": 0.13447665490400346,
|
||
|
|
"calibration/mean_confidence": 0.5198654114470358,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00087890625,
|
||
|
|
"completions/max_length": 656.6,
|
||
|
|
"completions/max_terminated_length": 656.6,
|
||
|
|
"completions/mean_length": 207.4677734375,
|
||
|
|
"completions/mean_terminated_length": 207.65007629394532,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 98.2,
|
||
|
|
"epoch": 0.56,
|
||
|
|
"grad_norm": 0.0007309909560717642,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0001,
|
||
|
|
"num_tokens": 589883970.0,
|
||
|
|
"reward": 0.9505073428153992,
|
||
|
|
"reward_std": 0.07694827765226364,
|
||
|
|
"rewards/accuracy_reward": 0.52626953125,
|
||
|
|
"rewards/brier_reward": 0.7975297689437866,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9535765409469604,
|
||
|
|
"rewards/format_reward": 0.99912109375,
|
||
|
|
"rewards/frontier_coverage_0": 0.142837318778038,
|
||
|
|
"rewards/frontier_coverage_1": 0.142837318778038,
|
||
|
|
"rewards/frontier_coverage_10": 0.14169300347566605,
|
||
|
|
"rewards/frontier_coverage_15": 0.125778466463089,
|
||
|
|
"rewards/frontier_coverage_20": 0.10275547206401825,
|
||
|
|
"rewards/frontier_coverage_25": 0.0894699290394783,
|
||
|
|
"rewards/frontier_coverage_5": 0.142837318778038,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.089459228515625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.12330863773822784,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0447296142578125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0447296142578125,
|
||
|
|
"signal/advantage_abs_mean": 0.05690548121929169,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.05690548121929169,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10379899442195892,
|
||
|
|
"signal/advantage_std": 0.10379899442195892,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13861277103424072,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1788020610809326,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01386127769947052,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01386127769947052,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02220112681388855,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.030533115193247796,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002220112690702081,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002220112690702081,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.001702880859375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.004971844470128417,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1741619348526001,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22573706805706023,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024905156344175337,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024905156344175337,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1741619348526001,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22573706805706023,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024905156344175337,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024905156344175337,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16985383331775666,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22016933262348176,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002428909717127681,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002428909717127681,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14225522726774215,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18450681865215302,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002034249692223966,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002034249692223966,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10933885574340821,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14097019135951996,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015635456424206496,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015635456424206496,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0914567232131958,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11628818064928055,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013078311458230019,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013078311458230019,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1741619348526001,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22573706805706023,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024905156344175337,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024905156344175337,
|
||
|
|
"step": 175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.3084092567902788,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9473711564829423,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9565885810199408,
|
||
|
|
"calibration/confidence_entropy": 0.44316332677270776,
|
||
|
|
"calibration/coverage@0%": 0.007427990459882583,
|
||
|
|
"calibration/coverage@1%": 0.007427990459882583,
|
||
|
|
"calibration/coverage@10%": 0.0762116254892368,
|
||
|
|
"calibration/coverage@15%": 0.21067453522504892,
|
||
|
|
"calibration/coverage@20%": 0.2955357142857143,
|
||
|
|
"calibration/coverage@25%": 0.3858060176125245,
|
||
|
|
"calibration/coverage@30%": 0.4729436766144814,
|
||
|
|
"calibration/coverage@5%": 0.007427990459882583,
|
||
|
|
"calibration/ece": 0.0980685209106232,
|
||
|
|
"calibration/mean_confidence": 0.5117813336144459,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.001171875,
|
||
|
|
"completions/max_length": 724.4,
|
||
|
|
"completions/max_terminated_length": 724.4,
|
||
|
|
"completions/mean_length": 212.187890625,
|
||
|
|
"completions/mean_terminated_length": 212.44044494628906,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 104.6,
|
||
|
|
"epoch": 0.576,
|
||
|
|
"grad_norm": 0.000960467616096139,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0007,
|
||
|
|
"num_tokens": 607243398.0,
|
||
|
|
"reward": 0.9429786443710327,
|
||
|
|
"reward_std": 0.0745589941740036,
|
||
|
|
"rewards/accuracy_reward": 0.516015625,
|
||
|
|
"rewards/brier_reward": 0.7830522418022156,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9498249053955078,
|
||
|
|
"rewards/format_reward": 0.998828125,
|
||
|
|
"rewards/frontier_coverage_0": 0.13849677741527558,
|
||
|
|
"rewards/frontier_coverage_1": 0.13849677741527558,
|
||
|
|
"rewards/frontier_coverage_10": 0.13743431270122528,
|
||
|
|
"rewards/frontier_coverage_15": 0.12267089337110519,
|
||
|
|
"rewards/frontier_coverage_20": 0.10137771666049958,
|
||
|
|
"rewards/frontier_coverage_25": 0.08106651529669762,
|
||
|
|
"rewards/frontier_coverage_5": 0.13843247890472413,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.087158203125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.12412037551403046,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0435791015625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0435791015625,
|
||
|
|
"signal/advantage_abs_mean": 0.053151430934667586,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.053151430934667586,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10061174780130386,
|
||
|
|
"signal/advantage_std": 0.10061174780130386,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13370026350021363,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.17423023283481598,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013370026648044587,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013370026648044587,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024790653213858604,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03437778577208519,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002479065442457795,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002479065442457795,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00225830078125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.0062928176019340755,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.965625,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001129150390625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.001129150390625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1691173493862152,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22055851221084594,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024183780420571565,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024183780420571565,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1691173493862152,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22055851221084594,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024183780420571565,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024183780420571565,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16804122924804688,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21918695271015168,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002402989659458399,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002402989659458399,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1508244901895523,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19721821546554566,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002156790136359632,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002156790136359632,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.12613796889781953,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.165260449051857,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018037728732451797,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018037728732451797,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09953366965055466,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12996020615100862,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014233314665034412,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014233314665034412,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16890983879566193,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22030186653137207,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002415410662069917,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002415410662069917,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2922215928302969,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9410299320000319,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9611906168793969,
|
||
|
|
"calibration/confidence_entropy": 0.42611274466780535,
|
||
|
|
"calibration/coverage@0%": 0.005496432086614173,
|
||
|
|
"calibration/coverage@1%": 0.005496432086614173,
|
||
|
|
"calibration/coverage@10%": 0.22282136550486334,
|
||
|
|
"calibration/coverage@15%": 0.3220304297900262,
|
||
|
|
"calibration/coverage@20%": 0.4666775385796105,
|
||
|
|
"calibration/coverage@25%": 0.562644392338561,
|
||
|
|
"calibration/coverage@30%": 0.6155552124008647,
|
||
|
|
"calibration/coverage@5%": 0.08693791492975142,
|
||
|
|
"calibration/ece": 0.1429741727432932,
|
||
|
|
"calibration/mean_confidence": 0.4983201035996781,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00107421875,
|
||
|
|
"completions/max_length": 702.0,
|
||
|
|
"completions/max_terminated_length": 702.0,
|
||
|
|
"completions/mean_length": 211.6880859375,
|
||
|
|
"completions/mean_terminated_length": 211.91741027832032,
|
||
|
|
"completions/min_length": 19.4,
|
||
|
|
"completions/min_terminated_length": 95.0,
|
||
|
|
"epoch": 0.592,
|
||
|
|
"grad_norm": 0.0008625586051493883,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0007,
|
||
|
|
"num_tokens": 624578796.0,
|
||
|
|
"reward": 0.9486447334289551,
|
||
|
|
"reward_std": 0.07493900805711746,
|
||
|
|
"rewards/accuracy_reward": 0.52861328125,
|
||
|
|
"rewards/brier_reward": 0.7828563928604126,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9443202376365661,
|
||
|
|
"rewards/format_reward": 0.998828125,
|
||
|
|
"rewards/frontier_coverage_0": 0.13428538143634797,
|
||
|
|
"rewards/frontier_coverage_1": 0.13428538143634797,
|
||
|
|
"rewards/frontier_coverage_10": 0.13371351063251496,
|
||
|
|
"rewards/frontier_coverage_15": 0.1263003244996071,
|
||
|
|
"rewards/frontier_coverage_20": 0.10833458304405212,
|
||
|
|
"rewards/frontier_coverage_25": 0.08254657685756683,
|
||
|
|
"rewards/frontier_coverage_5": 0.13412626087665558,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.094085693359375,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.1289389744400978,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0470428466796875,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0470428466796875,
|
||
|
|
"signal/advantage_abs_mean": 0.05466923713684082,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.05466923713684082,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10098292231559754,
|
||
|
|
"signal/advantage_std": 0.10098292231559754,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13195911645889283,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.17153678834438324,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013195911981165409,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013195911981165409,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029695136472582817,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03927401304244995,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029695137403905393,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029695137403905393,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.0022216796875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005560987815260887,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00111083984375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00111083984375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17768071293830873,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23065827786922455,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00254083420149982,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00254083420149982,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17768071293830873,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23065827786922455,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00254083420149982,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00254083420149982,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1767966330051422,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22955543994903566,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025281918235123156,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025281918235123156,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1646002173423767,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2140391707420349,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002353783091530204,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002353783091530204,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.143599534034729,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.18697430789470673,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020534733310341837,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020534733310341837,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10858550369739532,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1407358765602112,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015527727315202355,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015527727315202355,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17743545770645142,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23035745918750763,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025373270735144617,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025373270735144617,
|
||
|
|
"step": 185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2221950929064717,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8981914098495842,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9540588288714265,
|
||
|
|
"calibration/confidence_entropy": 0.3871919967367749,
|
||
|
|
"calibration/coverage@0%": 0.03520986519607843,
|
||
|
|
"calibration/coverage@1%": 0.03520986519607843,
|
||
|
|
"calibration/coverage@10%": 0.26088082107843136,
|
||
|
|
"calibration/coverage@15%": 0.42122549019607847,
|
||
|
|
"calibration/coverage@20%": 0.5440119485294117,
|
||
|
|
"calibration/coverage@25%": 0.6319975490196079,
|
||
|
|
"calibration/coverage@30%": 0.7090349264705882,
|
||
|
|
"calibration/coverage@5%": 0.16232536764705885,
|
||
|
|
"calibration/ece": 0.10212346384803919,
|
||
|
|
"calibration/mean_confidence": 0.4796287089460784,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00205078125,
|
||
|
|
"completions/max_length": 616.0,
|
||
|
|
"completions/max_terminated_length": 616.0,
|
||
|
|
"completions/mean_length": 212.79658203125,
|
||
|
|
"completions/mean_terminated_length": 213.23521423339844,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 101.2,
|
||
|
|
"epoch": 0.608,
|
||
|
|
"grad_norm": 0.0006865831674076617,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0015,
|
||
|
|
"num_tokens": 641757321.0,
|
||
|
|
"reward": 0.953406548500061,
|
||
|
|
"reward_std": 0.06920359134674073,
|
||
|
|
"rewards/accuracy_reward": 0.52880859375,
|
||
|
|
"rewards/brier_reward": 0.812217366695404,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9366782307624817,
|
||
|
|
"rewards/format_reward": 0.9978515625,
|
||
|
|
"rewards/frontier_coverage_0": 0.17517081648111343,
|
||
|
|
"rewards/frontier_coverage_1": 0.17517081648111343,
|
||
|
|
"rewards/frontier_coverage_10": 0.16990652978420256,
|
||
|
|
"rewards/frontier_coverage_15": 0.14712055921554565,
|
||
|
|
"rewards/frontier_coverage_20": 0.12427032291889191,
|
||
|
|
"rewards/frontier_coverage_25": 0.09521407037973403,
|
||
|
|
"rewards/frontier_coverage_5": 0.17517081648111343,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.090557861328125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.11723618507385254,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0452789306640625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0452789306640625,
|
||
|
|
"signal/advantage_abs_mean": 0.05036781206727028,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.05036781206727028,
|
||
|
|
"signal/advantage_pre_scale_std": 0.09739291965961457,
|
||
|
|
"signal/advantage_std": 0.09739291965961457,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12750503718852996,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.16529836356639863,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012750503793358802,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012750503793358802,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03464677035808563,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04760062992572785,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003464677091687918,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003464677091687918,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00411376953125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.010808163974434137,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.94375,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002056884765625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.002056884765625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1837489575147629,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23425883054733276,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026276100426912306,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026276100426912306,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1837489575147629,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23425883054733276,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026276100426912306,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026276100426912306,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1784739762544632,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22762221992015838,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00255217794328928,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00255217794328928,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1537548005580902,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19631823003292084,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002198693575337529,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002198693575337529,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1277748465538025,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16300177872180938,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001827180222608149,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001827180222608149,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08930874615907669,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11316503584384918,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012771150562912227,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012771150562912227,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1837489575147629,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23425883054733276,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026276100426912306,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026276100426912306,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.25358338367873656,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9409049379893626,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9444694889337532,
|
||
|
|
"calibration/confidence_entropy": 0.4293404041236972,
|
||
|
|
"calibration/coverage@0%": 0.015234375,
|
||
|
|
"calibration/coverage@1%": 0.015234375,
|
||
|
|
"calibration/coverage@10%": 0.1504701259784736,
|
||
|
|
"calibration/coverage@15%": 0.23410591976516634,
|
||
|
|
"calibration/coverage@20%": 0.42790560787671234,
|
||
|
|
"calibration/coverage@25%": 0.525601608365949,
|
||
|
|
"calibration/coverage@30%": 0.6604444410469668,
|
||
|
|
"calibration/coverage@5%": 0.05234375,
|
||
|
|
"calibration/ece": 0.10427488049231773,
|
||
|
|
"calibration/mean_confidence": 0.5089863970315482,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00087890625,
|
||
|
|
"completions/max_length": 652.6,
|
||
|
|
"completions/max_terminated_length": 652.6,
|
||
|
|
"completions/mean_length": 216.096484375,
|
||
|
|
"completions/mean_terminated_length": 216.2876770019531,
|
||
|
|
"completions/min_length": 21.2,
|
||
|
|
"completions/min_terminated_length": 102.6,
|
||
|
|
"epoch": 0.624,
|
||
|
|
"grad_norm": 0.0008978003170341253,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0009,
|
||
|
|
"num_tokens": 659314053.0,
|
||
|
|
"reward": 0.9518577218055725,
|
||
|
|
"reward_std": 0.07592637240886688,
|
||
|
|
"rewards/accuracy_reward": 0.52685546875,
|
||
|
|
"rewards/brier_reward": 0.8048586487770081,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9485518336296082,
|
||
|
|
"rewards/format_reward": 0.9990234375,
|
||
|
|
"rewards/frontier_coverage_0": 0.16178978681564332,
|
||
|
|
"rewards/frontier_coverage_1": 0.16178978681564332,
|
||
|
|
"rewards/frontier_coverage_10": 0.14961466044187546,
|
||
|
|
"rewards/frontier_coverage_15": 0.12837036401033403,
|
||
|
|
"rewards/frontier_coverage_20": 0.10305129885673522,
|
||
|
|
"rewards/frontier_coverage_25": 0.08324643075466157,
|
||
|
|
"rewards/frontier_coverage_5": 0.16159312427043915,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.095526123046875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.12815573960542678,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0477630615234375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0477630615234375,
|
||
|
|
"signal/advantage_abs_mean": 0.05672949850559235,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.05672949850559235,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10506684482097625,
|
||
|
|
"signal/advantage_std": 0.10506684482097625,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13485520780086518,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.17366183698177337,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013485521078109741,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013485521078109741,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025889959558844565,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.035064182430505755,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025889959651976824,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025889959651976824,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00186767578125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.004851654777303338,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000933837890625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000933837890625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17606533467769622,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22901685237884523,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025177341885864735,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025177341885864735,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17606533467769622,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22901685237884523,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025177341885864735,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025177341885864735,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15669833421707152,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20450213849544524,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022407862357795238,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022407862357795238,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1294904828071594,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16927442252635955,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001851713890209794,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001851713890209794,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09612660855054855,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1249430313706398,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013746104203164577,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013746104203164577,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07619047313928604,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09707885384559631,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010895237675867975,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010895237675867975,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1756005883216858,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2284373462200165,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025110884103924035,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025110884103924035,
|
||
|
|
"step": 195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.26737008830943737,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9344917692121356,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9439410149296797,
|
||
|
|
"calibration/confidence_entropy": 0.4337533162224349,
|
||
|
|
"calibration/coverage@0%": 0.00703125,
|
||
|
|
"calibration/coverage@1%": 0.00703125,
|
||
|
|
"calibration/coverage@10%": 0.26171875,
|
||
|
|
"calibration/coverage@15%": 0.3359559184675835,
|
||
|
|
"calibration/coverage@20%": 0.4015809184675835,
|
||
|
|
"calibration/coverage@25%": 0.5195865054027504,
|
||
|
|
"calibration/coverage@30%": 0.6243131446463654,
|
||
|
|
"calibration/coverage@5%": 0.160546875,
|
||
|
|
"calibration/ece": 0.16993083121851912,
|
||
|
|
"calibration/mean_confidence": 0.5635277496385536,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.001953125,
|
||
|
|
"completions/max_length": 843.4,
|
||
|
|
"completions/max_terminated_length": 843.4,
|
||
|
|
"completions/mean_length": 220.33671875,
|
||
|
|
"completions/mean_terminated_length": 220.7684814453125,
|
||
|
|
"completions/min_length": 20.4,
|
||
|
|
"completions/min_terminated_length": 100.8,
|
||
|
|
"epoch": 0.64,
|
||
|
|
"grad_norm": 0.0007992589962668717,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.001,
|
||
|
|
"num_tokens": 676912989.0,
|
||
|
|
"reward": 0.9679869651794434,
|
||
|
|
"reward_std": 0.06987917795777321,
|
||
|
|
"rewards/accuracy_reward": 0.56767578125,
|
||
|
|
"rewards/brier_reward": 0.7970318555831909,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9502007246017456,
|
||
|
|
"rewards/format_reward": 0.998046875,
|
||
|
|
"rewards/frontier_coverage_0": 0.11542568355798721,
|
||
|
|
"rewards/frontier_coverage_1": 0.11542568355798721,
|
||
|
|
"rewards/frontier_coverage_10": 0.1147423341870308,
|
||
|
|
"rewards/frontier_coverage_15": 0.09766240417957306,
|
||
|
|
"rewards/frontier_coverage_20": 0.08443292677402496,
|
||
|
|
"rewards/frontier_coverage_25": 0.08432328850030898,
|
||
|
|
"rewards/frontier_coverage_5": 0.11542568355798721,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.082891845703125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.112214395403862,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0414459228515625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0414459228515625,
|
||
|
|
"signal/advantage_abs_mean": 0.051616641134023665,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.051616641134023665,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10157921314239501,
|
||
|
|
"signal/advantage_std": 0.10157921314239501,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12237899452447891,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.15896496474742888,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012237900123000144,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012237900123000144,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02465735524892807,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03368319347500801,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00246573556214571,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00246573556214571,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.0033203125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.006831592507660389,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00166015625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00166015625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.147454434633255,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19461724758148194,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021085983607918024,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021085983607918024,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.147454434633255,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19461724758148194,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021085983607918024,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021085983607918024,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13889139294624328,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18324436545372008,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019861468579620125,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019861468579620125,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11265147924423217,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1481318399310112,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016109161078929901,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016109161078929901,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08642503172159195,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11224976629018783,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012358779087662697,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012358779087662697,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07372982874512672,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09456790834665299,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001054336572997272,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001054336572997272,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.147454434633255,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19461724758148194,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021085983607918024,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021085983607918024,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.64,
|
||
|
|
"eval_calibration/aurc": 0.44617585022319983,
|
||
|
|
"eval_calibration/batch_distribution_entropy": 0.8707856047342964,
|
||
|
|
"eval_calibration/buffer_distribution_entropy": 0.9545939481969506,
|
||
|
|
"eval_calibration/confidence_entropy": 0.42791586438434864,
|
||
|
|
"eval_calibration/coverage@0%": 0.03125,
|
||
|
|
"eval_calibration/coverage@1%": 0.03125,
|
||
|
|
"eval_calibration/coverage@10%": 0.03125,
|
||
|
|
"eval_calibration/coverage@15%": 0.03125,
|
||
|
|
"eval_calibration/coverage@20%": 0.1328125,
|
||
|
|
"eval_calibration/coverage@25%": 0.203125,
|
||
|
|
"eval_calibration/coverage@30%": 0.2109375,
|
||
|
|
"eval_calibration/coverage@5%": 0.03125,
|
||
|
|
"eval_calibration/ece": 0.2234921875,
|
||
|
|
"eval_calibration/mean_confidence": 0.5258359375,
|
||
|
|
"eval_completions/clipped_ratio": 0.001953125,
|
||
|
|
"eval_completions/max_length": 470.0,
|
||
|
|
"eval_completions/max_terminated_length": 470.0,
|
||
|
|
"eval_completions/mean_length": 221.40463256835938,
|
||
|
|
"eval_completions/mean_terminated_length": 221.83445739746094,
|
||
|
|
"eval_completions/min_length": 87.5,
|
||
|
|
"eval_completions/min_terminated_length": 118.0,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 676912989.0,
|
||
|
|
"eval_reward": 0.8984390497207642,
|
||
|
|
"eval_reward_std": 0.23546447232365608,
|
||
|
|
"eval_rewards/accuracy_reward": 0.43359375,
|
||
|
|
"eval_rewards/brier_reward": 0.77432681620121,
|
||
|
|
"eval_rewards/confidence_uniqueness_reward": 0.9010600447654724,
|
||
|
|
"eval_rewards/format_reward": 0.998046875,
|
||
|
|
"eval_rewards/frontier_coverage_0": 0.18082591891288757,
|
||
|
|
"eval_rewards/frontier_coverage_1": 0.18082591891288757,
|
||
|
|
"eval_rewards/frontier_coverage_10": 0.1791050136089325,
|
||
|
|
"eval_rewards/frontier_coverage_15": 0.1590343937277794,
|
||
|
|
"eval_rewards/frontier_coverage_20": 0.1054372489452362,
|
||
|
|
"eval_rewards/frontier_coverage_25": 0.06905412301421165,
|
||
|
|
"eval_rewards/frontier_coverage_5": 0.18026751652359962,
|
||
|
|
"eval_runtime": 31.6018,
|
||
|
|
"eval_samples_per_second": 15.822,
|
||
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.47314453125,
|
||
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4938596710562706,
|
||
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.236572265625,
|
||
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
||
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.236572265625,
|
||
|
|
"eval_signal/advantage_abs_mean": 0.21800414845347404,
|
||
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21800414845347404,
|
||
|
|
"eval_signal/advantage_pre_scale_std": 0.2330782301723957,
|
||
|
|
"eval_signal/advantage_std": 0.2330782301723957,
|
||
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2277192696928978,
|
||
|
|
"eval_signal/brier_reward/group_std_mean": 0.27933692932128906,
|
||
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022771927528083324,
|
||
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.022771927528083324,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.043904950842261314,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05730144586414099,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00439049513079226,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00439049513079226,
|
||
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
|
||
|
|
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
|
||
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
|
||
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
|
||
|
|
"eval_signal/format_reward/weight": 0.5,
|
||
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
|
||
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.3516712710261345,
|
||
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.44670548290014267,
|
||
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005028899176977575,
|
||
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005028899176977575,
|
||
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3516712710261345,
|
||
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.44670548290014267,
|
||
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005028899176977575,
|
||
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005028899176977575,
|
||
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3491114154458046,
|
||
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4436797574162483,
|
||
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004992293077521026,
|
||
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004992293077521026,
|
||
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.302977554500103,
|
||
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.3866383582353592,
|
||
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004332578741014004,
|
||
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004332578741014004,
|
||
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.20721931010484695,
|
||
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.2700957730412483,
|
||
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002963236125651747,
|
||
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002963236125651747,
|
||
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.13527094572782516,
|
||
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.17407378554344177,
|
||
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019343745952937752,
|
||
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019343745952937752,
|
||
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.35068874806165695,
|
||
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4455043748021126,
|
||
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005014849128201604,
|
||
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005014849128201604,
|
||
|
|
"eval_steps_per_second": 0.127,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.4264361275920406,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9508852979247158,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9585796176872498,
|
||
|
|
"calibration/confidence_entropy": 0.46872125411654464,
|
||
|
|
"calibration/coverage@0%": 0.00234375,
|
||
|
|
"calibration/coverage@1%": 0.00234375,
|
||
|
|
"calibration/coverage@10%": 0.0078125,
|
||
|
|
"calibration/coverage@15%": 0.041015625,
|
||
|
|
"calibration/coverage@20%": 0.0910324425146771,
|
||
|
|
"calibration/coverage@25%": 0.13128822162426615,
|
||
|
|
"calibration/coverage@30%": 0.22585004892367905,
|
||
|
|
"calibration/coverage@5%": 0.00234375,
|
||
|
|
"calibration/ece": 0.14194846120468735,
|
||
|
|
"calibration/mean_confidence": 0.5072344795816444,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00146484375,
|
||
|
|
"completions/max_length": 965.2,
|
||
|
|
"completions/max_terminated_length": 965.2,
|
||
|
|
"completions/mean_length": 220.93134765625,
|
||
|
|
"completions/mean_terminated_length": 221.25537109375,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 104.8,
|
||
|
|
"epoch": 0.656,
|
||
|
|
"grad_norm": 0.0007650917395949364,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0012,
|
||
|
|
"num_tokens": 694031870.0,
|
||
|
|
"reward": 0.9340495944023133,
|
||
|
|
"reward_std": 0.07882022857666016,
|
||
|
|
"rewards/accuracy_reward": 0.49873046875,
|
||
|
|
"rewards/brier_reward": 0.7780602931976318,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9526399970054626,
|
||
|
|
"rewards/format_reward": 0.99853515625,
|
||
|
|
"rewards/frontier_coverage_0": 0.13952269703149794,
|
||
|
|
"rewards/frontier_coverage_1": 0.13952269703149794,
|
||
|
|
"rewards/frontier_coverage_10": 0.13961339592933655,
|
||
|
|
"rewards/frontier_coverage_15": 0.12747088223695754,
|
||
|
|
"rewards/frontier_coverage_20": 0.10037298947572708,
|
||
|
|
"rewards/frontier_coverage_25": 0.07738360986113549,
|
||
|
|
"rewards/frontier_coverage_5": 0.13952269703149794,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.096051025390625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.12981612980365753,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0480255126953125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0480255126953125,
|
||
|
|
"signal/advantage_abs_mean": 0.058326976001262666,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.058326976001262666,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10792815685272217,
|
||
|
|
"signal/advantage_std": 0.10792815685272217,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.13490013182163238,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.17295385301113128,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013490013219416142,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013490013219416142,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02310769259929657,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03357519060373306,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002310769259929657,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002310769259929657,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002838134765625,
|
||
|
|
"signal/format_reward/group_std_mean": 0.008286407357081771,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.953125,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014190673828125,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0014190673828125,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16485767662525178,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21537945568561553,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002357464749366045,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002357464749366045,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16485767662525178,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21537945568561553,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002357464749366045,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002357464749366045,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16448655128479003,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2148954153060913,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002352157747372985,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002352157747372985,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14851576387882232,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19453147947788238,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021237753331661223,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021237753331661223,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11294280290603638,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14792169630527496,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016150820534676313,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016150820534676313,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08520928621292115,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11086486130952836,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001218492747284472,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001218492747284472,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16485767662525178,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21537945568561553,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002357464749366045,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002357464749366045,
|
||
|
|
"step": 205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2837096886454081,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9393985931619993,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9664153325877605,
|
||
|
|
"calibration/confidence_entropy": 0.4499859017812251,
|
||
|
|
"calibration/coverage@0%": 0.04806522137964775,
|
||
|
|
"calibration/coverage@1%": 0.05744022137964775,
|
||
|
|
"calibration/coverage@10%": 0.17310267857142855,
|
||
|
|
"calibration/coverage@15%": 0.21100629892367903,
|
||
|
|
"calibration/coverage@20%": 0.29230140044031316,
|
||
|
|
"calibration/coverage@25%": 0.34975538160469666,
|
||
|
|
"calibration/coverage@30%": 0.4682347725048923,
|
||
|
|
"calibration/coverage@5%": 0.11333552470645794,
|
||
|
|
"calibration/ece": 0.14122779847886976,
|
||
|
|
"calibration/mean_confidence": 0.5043080658845975,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0013671875,
|
||
|
|
"completions/max_length": 940.2,
|
||
|
|
"completions/max_terminated_length": 940.2,
|
||
|
|
"completions/mean_length": 219.98779296875,
|
||
|
|
"completions/mean_terminated_length": 220.29050903320314,
|
||
|
|
"completions/min_length": 20.8,
|
||
|
|
"completions/min_terminated_length": 104.8,
|
||
|
|
"epoch": 0.672,
|
||
|
|
"grad_norm": 0.0007192640914581716,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0012,
|
||
|
|
"num_tokens": 711197985.0,
|
||
|
|
"reward": 0.9461759328842163,
|
||
|
|
"reward_std": 0.07189572900533676,
|
||
|
|
"rewards/accuracy_reward": 0.51611328125,
|
||
|
|
"rewards/brier_reward": 0.7978503227233886,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9457229971885681,
|
||
|
|
"rewards/format_reward": 0.99853515625,
|
||
|
|
"rewards/frontier_coverage_0": 0.15941035151481628,
|
||
|
|
"rewards/frontier_coverage_1": 0.15941035151481628,
|
||
|
|
"rewards/frontier_coverage_10": 0.15893703401088716,
|
||
|
|
"rewards/frontier_coverage_15": 0.14440302103757857,
|
||
|
|
"rewards/frontier_coverage_20": 0.12741003930568695,
|
||
|
|
"rewards/frontier_coverage_25": 0.10461192578077316,
|
||
|
|
"rewards/frontier_coverage_5": 0.15941035151481628,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.096649169921875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.13027789890766145,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0483245849609375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0483245849609375,
|
||
|
|
"signal/advantage_abs_mean": 0.05240143835544586,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.05240143835544586,
|
||
|
|
"signal/advantage_pre_scale_std": 0.09864708036184311,
|
||
|
|
"signal/advantage_std": 0.09864708036184311,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1261022225022316,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1641417384147644,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012610222585499287,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012610222585499287,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026672930270433427,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03577820919454098,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002667293045669794,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002667293045669794,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002764892578125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.006545652449131012,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0013824462890625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0013824462890625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17697471082210542,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23011261522769927,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025307383853942155,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025307383853942155,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17697471082210542,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23011261522769927,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025307383853942155,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025307383853942155,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1716696798801422,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2236652761697769,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024548764806240795,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024548764806240795,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1511073052883148,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1979276180267334,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002160834474489093,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002160834474489093,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.12761921137571336,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16750997304916382,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001824954734183848,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001824954734183848,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09979205876588822,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13040834069252014,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014270264655351639,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014270264655351639,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17697471082210542,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23011261522769927,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025307383853942155,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025307383853942155,
|
||
|
|
"step": 210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.32691903665475613,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9420609702487284,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9593856095088638,
|
||
|
|
"calibration/confidence_entropy": 0.4599136168799573,
|
||
|
|
"calibration/coverage@0%": 0.010546875,
|
||
|
|
"calibration/coverage@1%": 0.010546875,
|
||
|
|
"calibration/coverage@10%": 0.053125,
|
||
|
|
"calibration/coverage@15%": 0.158203125,
|
||
|
|
"calibration/coverage@20%": 0.364453125,
|
||
|
|
"calibration/coverage@25%": 0.51171875,
|
||
|
|
"calibration/coverage@30%": 0.615234375,
|
||
|
|
"calibration/coverage@5%": 0.01796875,
|
||
|
|
"calibration/ece": 0.13961892604670562,
|
||
|
|
"calibration/mean_confidence": 0.48078719925966695,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00126953125,
|
||
|
|
"completions/max_length": 795.2,
|
||
|
|
"completions/max_terminated_length": 795.2,
|
||
|
|
"completions/mean_length": 223.0029296875,
|
||
|
|
"completions/mean_terminated_length": 223.2849548339844,
|
||
|
|
"completions/min_length": 22.0,
|
||
|
|
"completions/min_terminated_length": 101.4,
|
||
|
|
"epoch": 0.688,
|
||
|
|
"grad_norm": 0.000916535675060004,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0012,
|
||
|
|
"num_tokens": 728435455.0,
|
||
|
|
"reward": 0.9521884322166443,
|
||
|
|
"reward_std": 0.07523634880781174,
|
||
|
|
"rewards/accuracy_reward": 0.53291015625,
|
||
|
|
"rewards/brier_reward": 0.7939005017280578,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9498188853263855,
|
||
|
|
"rewards/format_reward": 0.99873046875,
|
||
|
|
"rewards/frontier_coverage_0": 0.13943086341023445,
|
||
|
|
"rewards/frontier_coverage_1": 0.13943086341023445,
|
||
|
|
"rewards/frontier_coverage_10": 0.13533695340156554,
|
||
|
|
"rewards/frontier_coverage_15": 0.11558721587061882,
|
||
|
|
"rewards/frontier_coverage_20": 0.09408902376890182,
|
||
|
|
"rewards/frontier_coverage_25": 0.07558858171105384,
|
||
|
|
"rewards/frontier_coverage_5": 0.13943086341023445,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.106097412109375,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.13813600242137908,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0530487060546875,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0530487060546875,
|
||
|
|
"signal/advantage_abs_mean": 0.05597299709916115,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.05597299709916115,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10407697558403015,
|
||
|
|
"signal/advantage_std": 0.10407697558403015,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12551425993442536,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1616061270236969,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012551426328718663,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012551426328718663,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023624447733163835,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.032869836688041686,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023624447640031576,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023624447640031576,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002459716796875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.007181553123518825,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.959375,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012298583984375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0012298583984375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17795217037200928,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2277902901172638,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025447160936892033,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025447160936892033,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17795217037200928,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2277902901172638,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025447160936892033,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025447160936892033,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16942830383777618,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21730588376522064,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002422824781388044,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002422824781388044,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14047065675258635,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1811319559812546,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00200873042922467,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00200873042922467,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1157080888748169,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14973447024822234,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016546256374567747,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016546256374567747,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09142259359359742,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11834468692541122,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013073430862277746,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013073430862277746,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17795217037200928,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2277902901172638,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025447160936892033,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025447160936892033,
|
||
|
|
"step": 215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2512719069451267,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9361608966618207,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9529173332818509,
|
||
|
|
"calibration/confidence_entropy": 0.4415924822970334,
|
||
|
|
"calibration/coverage@0%": 0.01840379302405894,
|
||
|
|
"calibration/coverage@1%": 0.01840379302405894,
|
||
|
|
"calibration/coverage@10%": 0.06894454057307856,
|
||
|
|
"calibration/coverage@15%": 0.18214830709681135,
|
||
|
|
"calibration/coverage@20%": 0.34576391986589156,
|
||
|
|
"calibration/coverage@25%": 0.5378147123575457,
|
||
|
|
"calibration/coverage@30%": 0.7360634874669045,
|
||
|
|
"calibration/coverage@5%": 0.04623161165150992,
|
||
|
|
"calibration/ece": 0.11434603267232472,
|
||
|
|
"calibration/mean_confidence": 0.4953933710343634,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00107421875,
|
||
|
|
"completions/max_length": 581.8,
|
||
|
|
"completions/max_terminated_length": 581.8,
|
||
|
|
"completions/mean_length": 222.84072265625,
|
||
|
|
"completions/mean_terminated_length": 223.0822326660156,
|
||
|
|
"completions/min_length": 21.0,
|
||
|
|
"completions/min_terminated_length": 101.8,
|
||
|
|
"epoch": 0.704,
|
||
|
|
"grad_norm": 0.0006802778807468712,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0008,
|
||
|
|
"num_tokens": 745583488.0,
|
||
|
|
"reward": 0.9577384948730469,
|
||
|
|
"reward_std": 0.06939697116613389,
|
||
|
|
"rewards/accuracy_reward": 0.54189453125,
|
||
|
|
"rewards/brier_reward": 0.8031554341316223,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9526785373687744,
|
||
|
|
"rewards/format_reward": 0.99892578125,
|
||
|
|
"rewards/frontier_coverage_0": 0.1384565994143486,
|
||
|
|
"rewards/frontier_coverage_1": 0.1384565994143486,
|
||
|
|
"rewards/frontier_coverage_10": 0.12583428174257277,
|
||
|
|
"rewards/frontier_coverage_15": 0.10680036693811416,
|
||
|
|
"rewards/frontier_coverage_20": 0.09270759522914887,
|
||
|
|
"rewards/frontier_coverage_25": 0.08056866303086281,
|
||
|
|
"rewards/frontier_coverage_5": 0.1385010600090027,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.086895751953125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.1153362974524498,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0434478759765625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0434478759765625,
|
||
|
|
"signal/advantage_abs_mean": 0.05145658850669861,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.05145658850669861,
|
||
|
|
"signal/advantage_pre_scale_std": 0.09849272668361664,
|
||
|
|
"signal/advantage_std": 0.09849272668361664,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12422804683446884,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1597428023815155,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012422805279493332,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012422805279493332,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021860988438129426,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03040802404284477,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021860988344997168,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021860988344997168,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002069091796875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005740390345454216,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16862587928771972,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21537420451641082,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002411350002512336,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002411350002512336,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16862587928771972,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21537420451641082,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002411350002512336,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002411350002512336,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1515301063656807,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19380762279033661,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021668805042281748,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021668805042281748,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12467771619558335,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15953091979026796,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017828913405537605,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017828913405537605,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09930351227521897,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12707742750644685,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001420040219090879,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001420040219090879,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07597752884030343,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09732898026704788,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010864786920137704,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010864786920137704,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16813057661056519,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21476930379867554,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024042673408985137,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024042673408985137,
|
||
|
|
"step": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.22253628887566307,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9450462719837344,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9572577058713276,
|
||
|
|
"calibration/confidence_entropy": 0.4206489216290737,
|
||
|
|
"calibration/coverage@0%": 0.06993792808219178,
|
||
|
|
"calibration/coverage@1%": 0.08751605308219178,
|
||
|
|
"calibration/coverage@10%": 0.26743364726027397,
|
||
|
|
"calibration/coverage@15%": 0.4586464958414872,
|
||
|
|
"calibration/coverage@20%": 0.5466005687377691,
|
||
|
|
"calibration/coverage@25%": 0.6150004586594913,
|
||
|
|
"calibration/coverage@30%": 0.6967007093933464,
|
||
|
|
"calibration/coverage@5%": 0.16965967465753423,
|
||
|
|
"calibration/ece": 0.14508923354516995,
|
||
|
|
"calibration/mean_confidence": 0.5053411371799115,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00146484375,
|
||
|
|
"completions/max_length": 632.6,
|
||
|
|
"completions/max_terminated_length": 632.6,
|
||
|
|
"completions/mean_length": 222.9833984375,
|
||
|
|
"completions/mean_terminated_length": 223.31114196777344,
|
||
|
|
"completions/min_length": 19.8,
|
||
|
|
"completions/min_terminated_length": 104.2,
|
||
|
|
"epoch": 0.72,
|
||
|
|
"grad_norm": 0.0008460658136755228,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0008,
|
||
|
|
"num_tokens": 762876694.0,
|
||
|
|
"reward": 0.9683452606201172,
|
||
|
|
"reward_std": 0.0708312913775444,
|
||
|
|
"rewards/accuracy_reward": 0.56240234375,
|
||
|
|
"rewards/brier_reward": 0.8110374331474304,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9519586086273193,
|
||
|
|
"rewards/format_reward": 0.9984375,
|
||
|
|
"rewards/frontier_coverage_0": 0.1352271929383278,
|
||
|
|
"rewards/frontier_coverage_1": 0.1352271929383278,
|
||
|
|
"rewards/frontier_coverage_10": 0.1254192978143692,
|
||
|
|
"rewards/frontier_coverage_15": 0.111299267411232,
|
||
|
|
"rewards/frontier_coverage_20": 0.08802737891674042,
|
||
|
|
"rewards/frontier_coverage_25": 0.08286877870559692,
|
||
|
|
"rewards/frontier_coverage_5": 0.134917189180851,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.088519287109375,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.12481878697872162,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0442596435546875,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0442596435546875,
|
||
|
|
"signal/advantage_abs_mean": 0.04948367401957512,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.04948367401957512,
|
||
|
|
"signal/advantage_pre_scale_std": 0.09783572107553482,
|
||
|
|
"signal/advantage_std": 0.09783572107553482,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.11841019541025162,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.154974827170372,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011841019801795482,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011841019801795482,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02254415713250637,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.032048237323760984,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022544157691299915,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022544157691299915,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.0030029296875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.008166217897087335,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.95625,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00150146484375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00150146484375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1640920639038086,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21496865749359131,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023465165868401527,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023465165868401527,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1640920639038086,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21496865749359131,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023465165868401527,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023465165868401527,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14849277436733246,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19446865618228912,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002123446692712605,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002123446692712605,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1182610735297203,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1555154412984848,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016911332961171866,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016911332961171866,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08573998808860779,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11260216832160949,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001226081815548241,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001226081815548241,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06338882744312287,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08182145059108734,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009064602083526552,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009064602083526552,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1630953937768936,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2136875331401825,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002332264045253396,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002332264045253396,
|
||
|
|
"step": 225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.24824451300334033,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9365923239236968,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.963191063037019,
|
||
|
|
"calibration/confidence_entropy": 0.4336782707003815,
|
||
|
|
"calibration/coverage@0%": 0.022297794117647058,
|
||
|
|
"calibration/coverage@1%": 0.022297794117647058,
|
||
|
|
"calibration/coverage@10%": 0.08643829920666897,
|
||
|
|
"calibration/coverage@15%": 0.30066691188461686,
|
||
|
|
"calibration/coverage@20%": 0.46180249282932345,
|
||
|
|
"calibration/coverage@25%": 0.5868881812142664,
|
||
|
|
"calibration/coverage@30%": 0.672549124529949,
|
||
|
|
"calibration/coverage@5%": 0.03950287606480948,
|
||
|
|
"calibration/ece": 0.13127255862630277,
|
||
|
|
"calibration/mean_confidence": 0.5733970891471901,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00126953125,
|
||
|
|
"completions/max_length": 864.6,
|
||
|
|
"completions/max_terminated_length": 864.6,
|
||
|
|
"completions/mean_length": 223.4474609375,
|
||
|
|
"completions/mean_terminated_length": 223.73309936523438,
|
||
|
|
"completions/min_length": 19.6,
|
||
|
|
"completions/min_terminated_length": 95.0,
|
||
|
|
"epoch": 0.736,
|
||
|
|
"grad_norm": 0.0006176315364427865,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0009,
|
||
|
|
"num_tokens": 780104380.0,
|
||
|
|
"reward": 0.9651668429374695,
|
||
|
|
"reward_std": 0.0696649581193924,
|
||
|
|
"rewards/accuracy_reward": 0.56181640625,
|
||
|
|
"rewards/brier_reward": 0.7952903747558594,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9530894517898559,
|
||
|
|
"rewards/format_reward": 0.99873046875,
|
||
|
|
"rewards/frontier_coverage_0": 0.1159910187125206,
|
||
|
|
"rewards/frontier_coverage_1": 0.1159910187125206,
|
||
|
|
"rewards/frontier_coverage_10": 0.10471980273723602,
|
||
|
|
"rewards/frontier_coverage_15": 0.08863085955381393,
|
||
|
|
"rewards/frontier_coverage_20": 0.0780431255698204,
|
||
|
|
"rewards/frontier_coverage_25": 0.08450771719217301,
|
||
|
|
"rewards/frontier_coverage_5": 0.11529324352741241,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.085174560546875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.11592481285333633,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0425872802734375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0425872802734375,
|
||
|
|
"signal/advantage_abs_mean": 0.05093270391225815,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.05093270391225815,
|
||
|
|
"signal/advantage_pre_scale_std": 0.09835466593503953,
|
||
|
|
"signal/advantage_std": 0.09835466593503953,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12378608733415604,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.16011227071285247,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01237860918045044,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01237860918045044,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022135768830776215,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03068559318780899,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002213576971553266,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002213576971553266,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002423095703125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.006172627722844481,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012115478515625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0012115478515625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15735355913639068,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20486867129802705,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002250155946239829,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002250155946239829,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15735355913639068,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20486867129802705,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002250155946239829,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002250155946239829,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13432896435260772,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1756158649921417,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019209041725844144,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019209041725844144,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10221495479345322,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13430811911821366,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014616738073527813,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014616738073527813,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0775189757347107,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10161522030830383,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011085212929174304,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011085212929174304,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.069071663916111,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0897357627749443,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009877247619442641,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009877247619442641,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1556430786848068,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2026938110589981,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002225696016103029,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002225696016103029,
|
||
|
|
"step": 230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.26965961509465136,
|
||
|
|
"calibration/batch_distribution_entropy": 0.931343074157844,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9534197325324987,
|
||
|
|
"calibration/confidence_entropy": 0.42210382299186167,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.128515625,
|
||
|
|
"calibration/coverage@15%": 0.20675551470588233,
|
||
|
|
"calibration/coverage@20%": 0.4109256557931392,
|
||
|
|
"calibration/coverage@25%": 0.5564118678293235,
|
||
|
|
"calibration/coverage@30%": 0.6553425556866582,
|
||
|
|
"calibration/coverage@5%": 0.06171875,
|
||
|
|
"calibration/ece": 0.10822966591979921,
|
||
|
|
"calibration/mean_confidence": 0.4941520088048591,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00107421875,
|
||
|
|
"completions/max_length": 893.4,
|
||
|
|
"completions/max_terminated_length": 893.4,
|
||
|
|
"completions/mean_length": 220.48369140625,
|
||
|
|
"completions/mean_terminated_length": 220.721337890625,
|
||
|
|
"completions/min_length": 20.4,
|
||
|
|
"completions/min_terminated_length": 102.2,
|
||
|
|
"epoch": 0.752,
|
||
|
|
"grad_norm": 0.000702829216606915,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0009,
|
||
|
|
"num_tokens": 797589333.0,
|
||
|
|
"reward": 0.966651451587677,
|
||
|
|
"reward_std": 0.06932897940278053,
|
||
|
|
"rewards/accuracy_reward": 0.56298828125,
|
||
|
|
"rewards/brier_reward": 0.7999367952346802,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9526612877845764,
|
||
|
|
"rewards/format_reward": 0.99892578125,
|
||
|
|
"rewards/frontier_coverage_0": 0.1230026513338089,
|
||
|
|
"rewards/frontier_coverage_1": 0.1230026513338089,
|
||
|
|
"rewards/frontier_coverage_10": 0.1173777550458908,
|
||
|
|
"rewards/frontier_coverage_15": 0.09979364722967148,
|
||
|
|
"rewards/frontier_coverage_20": 0.07793587669730187,
|
||
|
|
"rewards/frontier_coverage_25": 0.06709063202142715,
|
||
|
|
"rewards/frontier_coverage_5": 0.12149005383253098,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.081842041015625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.11334883570671081,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0409210205078125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0409210205078125,
|
||
|
|
"signal/advantage_abs_mean": 0.049368849396705626,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.049368849396705626,
|
||
|
|
"signal/advantage_pre_scale_std": 0.0992576465010643,
|
||
|
|
"signal/advantage_std": 0.0992576465010643,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1113414391875267,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.14791098535060881,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01113414391875267,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01113414391875267,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02176951803267002,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.030010972917079926,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021769518963992594,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021769518963992594,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002069091796875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005740390298888088,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14007504731416703,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1843056410551071,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002003073110245168,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002003073110245168,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14007504731416703,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1843056410551071,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002003073110245168,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002003073110245168,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12636134028434753,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16672152578830718,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018069671699777246,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018069671699777246,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10274002999067307,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13605080544948578,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014691824093461038,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014691824093461038,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07285871803760528,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09634722769260406,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010418796446174383,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010418796446174383,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05874115601181984,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07601820230484009,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008399985264986754,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008399985264986754,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13739684820175171,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18090024590492249,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001964774914085865,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001964774914085865,
|
||
|
|
"step": 235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.2754913190231371,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9653659337495298,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9563910168815924,
|
||
|
|
"calibration/confidence_entropy": 0.46569203051240315,
|
||
|
|
"calibration/coverage@0%": 0.05728876142194554,
|
||
|
|
"calibration/coverage@1%": 0.05728876142194554,
|
||
|
|
"calibration/coverage@10%": 0.22487203881727966,
|
||
|
|
"calibration/coverage@15%": 0.29580118904764474,
|
||
|
|
"calibration/coverage@20%": 0.3658782739379324,
|
||
|
|
"calibration/coverage@25%": 0.4399407606957949,
|
||
|
|
"calibration/coverage@30%": 0.5046889265393624,
|
||
|
|
"calibration/coverage@5%": 0.14967154683575512,
|
||
|
|
"calibration/ece": 0.1645633654886728,
|
||
|
|
"calibration/mean_confidence": 0.517807627513297,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00107421875,
|
||
|
|
"completions/max_length": 835.2,
|
||
|
|
"completions/max_terminated_length": 835.2,
|
||
|
|
"completions/mean_length": 225.77333984375,
|
||
|
|
"completions/mean_terminated_length": 226.01569519042968,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 107.8,
|
||
|
|
"epoch": 0.768,
|
||
|
|
"grad_norm": 0.0008803294622339308,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0002,
|
||
|
|
"num_tokens": 814833956.0,
|
||
|
|
"reward": 0.9462002873420715,
|
||
|
|
"reward_std": 0.07274475544691086,
|
||
|
|
"rewards/accuracy_reward": 0.51416015625,
|
||
|
|
"rewards/brier_reward": 0.8063553452491761,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9532873511314393,
|
||
|
|
"rewards/format_reward": 0.99892578125,
|
||
|
|
"rewards/frontier_coverage_0": 0.1558481901884079,
|
||
|
|
"rewards/frontier_coverage_1": 0.1558481901884079,
|
||
|
|
"rewards/frontier_coverage_10": 0.1498277723789215,
|
||
|
|
"rewards/frontier_coverage_15": 0.1354019284248352,
|
||
|
|
"rewards/frontier_coverage_20": 0.11420131176710129,
|
||
|
|
"rewards/frontier_coverage_25": 0.09058187007904053,
|
||
|
|
"rewards/frontier_coverage_5": 0.1558481901884079,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.088494873046875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.11916659921407699,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0442474365234375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0442474365234375,
|
||
|
|
"signal/advantage_abs_mean": 0.05352484881877899,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.05352484881877899,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1029602348804474,
|
||
|
|
"signal/advantage_std": 0.1029602348804474,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.11680269986391068,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.15063838958740233,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011680270358920098,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011680270358920098,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021855100244283675,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.029894111678004265,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021855100989341737,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021855100989341737,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002069091796875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005740390345454216,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1492400586605072,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1952953338623047,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002134132944047451,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002134132944047451,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1492400586605072,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1952953338623047,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002134132944047451,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002134132944047451,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14196249544620515,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18581181466579438,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002030063676647842,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002030063676647842,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11799918264150619,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15506875813007354,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016873883083462714,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016873883083462714,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09261109083890914,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12177760004997254,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013243386289104818,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013243386289104818,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06712948903441429,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0871183454990387,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009599516983143985,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009599516983143985,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1492400586605072,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1952953338623047,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002134132944047451,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002134132944047451,
|
||
|
|
"step": 240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.34061015213100465,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9248090393225793,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.966630339904,
|
||
|
|
"calibration/confidence_entropy": 0.41748047920813597,
|
||
|
|
"calibration/coverage@0%": 0.009379592590460843,
|
||
|
|
"calibration/coverage@1%": 0.009379592590460843,
|
||
|
|
"calibration/coverage@10%": 0.15122242347281378,
|
||
|
|
"calibration/coverage@15%": 0.2530637224924216,
|
||
|
|
"calibration/coverage@20%": 0.31062806072771576,
|
||
|
|
"calibration/coverage@25%": 0.3466390901394804,
|
||
|
|
"calibration/coverage@30%": 0.37598804847281375,
|
||
|
|
"calibration/coverage@5%": 0.09453584259046084,
|
||
|
|
"calibration/ece": 0.15827124658513475,
|
||
|
|
"calibration/mean_confidence": 0.5404969754414958,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0009765625,
|
||
|
|
"completions/max_length": 775.4,
|
||
|
|
"completions/max_terminated_length": 775.4,
|
||
|
|
"completions/mean_length": 225.9041015625,
|
||
|
|
"completions/mean_terminated_length": 226.12729187011718,
|
||
|
|
"completions/min_length": 20.0,
|
||
|
|
"completions/min_terminated_length": 106.8,
|
||
|
|
"epoch": 0.784,
|
||
|
|
"grad_norm": 0.0007201886037364602,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0007,
|
||
|
|
"num_tokens": 832321582.0,
|
||
|
|
"reward": 0.9589765906333924,
|
||
|
|
"reward_std": 0.07071957588195801,
|
||
|
|
"rewards/accuracy_reward": 0.5533203125,
|
||
|
|
"rewards/brier_reward": 0.7801745533943176,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9526117444038391,
|
||
|
|
"rewards/format_reward": 0.9990234375,
|
||
|
|
"rewards/frontier_coverage_0": 0.10734488293528557,
|
||
|
|
"rewards/frontier_coverage_1": 0.10734488293528557,
|
||
|
|
"rewards/frontier_coverage_10": 0.10519935935735703,
|
||
|
|
"rewards/frontier_coverage_15": 0.09717852100729943,
|
||
|
|
"rewards/frontier_coverage_20": 0.07842598631978034,
|
||
|
|
"rewards/frontier_coverage_25": 0.06332094371318817,
|
||
|
|
"rewards/frontier_coverage_5": 0.10734488293528557,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0907470703125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.1225023627281189,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04537353515625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04537353515625,
|
||
|
|
"signal/advantage_abs_mean": 0.052248618006706236,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.052248618006706236,
|
||
|
|
"signal/advantage_pre_scale_std": 0.09925459623336792,
|
||
|
|
"signal/advantage_std": 0.09925459623336792,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12520308941602706,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.16114262938499452,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01252030897885561,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01252030897885561,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022027900069952012,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.030153784900903702,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022027899976819754,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022027899976819754,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.0018798828125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005187963135540485,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00093994140625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15730546414852142,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20497536659240723,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022494681645184754,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022494681645184754,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15730546414852142,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20497536659240723,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022494681645184754,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022494681645184754,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15118659734725953,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19713300466537476,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002161968289874494,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002161968289874494,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14073525965213776,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18378305733203887,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020125140668824314,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020125140668824314,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.100088232755661,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.13115270733833312,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001431261678226292,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001431261678226292,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07233644723892212,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09395631700754166,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010344112175516783,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010344112175516783,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15730546414852142,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20497536659240723,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022494681645184754,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022494681645184754,
|
||
|
|
"step": 245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.188944295291611,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9274885575581143,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.953439624363039,
|
||
|
|
"calibration/confidence_entropy": 0.4285214943213724,
|
||
|
|
"calibration/coverage@0%": 0.07191398605675146,
|
||
|
|
"calibration/coverage@1%": 0.07191398605675146,
|
||
|
|
"calibration/coverage@10%": 0.3349567331213307,
|
||
|
|
"calibration/coverage@15%": 0.5131688784246575,
|
||
|
|
"calibration/coverage@20%": 0.6077322345890411,
|
||
|
|
"calibration/coverage@25%": 0.6776793358610568,
|
||
|
|
"calibration/coverage@30%": 0.7593390716731898,
|
||
|
|
"calibration/coverage@5%": 0.15942621697651665,
|
||
|
|
"calibration/ece": 0.11407683786445302,
|
||
|
|
"calibration/mean_confidence": 0.5089776058348807,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.000390625,
|
||
|
|
"completions/max_length": 795.8,
|
||
|
|
"completions/max_terminated_length": 795.8,
|
||
|
|
"completions/mean_length": 225.09853515625,
|
||
|
|
"completions/mean_terminated_length": 225.18656005859376,
|
||
|
|
"completions/min_length": 24.2,
|
||
|
|
"completions/min_terminated_length": 113.8,
|
||
|
|
"epoch": 0.8,
|
||
|
|
"grad_norm": 0.0007698666886426508,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0001,
|
||
|
|
"num_tokens": 849637151.0,
|
||
|
|
"reward": 0.9806387066841126,
|
||
|
|
"reward_std": 0.06646973639726639,
|
||
|
|
"rewards/accuracy_reward": 0.589453125,
|
||
|
|
"rewards/brier_reward": 0.8106509804725647,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9531700253486634,
|
||
|
|
"rewards/format_reward": 0.999609375,
|
||
|
|
"rewards/frontier_coverage_0": 0.11020932197570801,
|
||
|
|
"rewards/frontier_coverage_1": 0.11020932197570801,
|
||
|
|
"rewards/frontier_coverage_10": 0.10672755688428878,
|
||
|
|
"rewards/frontier_coverage_15": 0.09707566052675247,
|
||
|
|
"rewards/frontier_coverage_20": 0.07785675972700119,
|
||
|
|
"rewards/frontier_coverage_25": 0.06814835816621781,
|
||
|
|
"rewards/frontier_coverage_5": 0.10986628532409667,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08389892578125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.1153394877910614,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.041949462890625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.041949462890625,
|
||
|
|
"signal/advantage_abs_mean": 0.04819626733660698,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.04819626733660698,
|
||
|
|
"signal/advantage_pre_scale_std": 0.09706049710512162,
|
||
|
|
"signal/advantage_std": 0.09706049710512162,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.10596445500850678,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.13802684843540192,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010596446134150029,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010596446134150029,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021166018024086953,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.027424711734056473,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021166018676012756,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021166018676012756,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1317434698343277,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17327735126018523,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001883931620977819,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001883931620977819,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1317434698343277,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17327735126018523,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001883931620977819,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001883931620977819,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12727195620536805,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16740552484989166,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018199889454990625,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018199889454990625,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11404764950275421,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15062055885791778,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016308813821524382,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016308813821524382,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08656607568264008,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11483617275953292,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012378948042169214,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012378948042169214,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.068538336455822,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0897128164768219,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009800982195883989,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009800982195883989,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1310385376214981,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17232318818569184,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018738510785624384,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018738510785624384,
|
||
|
|
"step": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8,
|
||
|
|
"eval_calibration/aurc": 0.4476727729252362,
|
||
|
|
"eval_calibration/batch_distribution_entropy": 0.8957618944466985,
|
||
|
|
"eval_calibration/buffer_distribution_entropy": 0.9420687234711133,
|
||
|
|
"eval_calibration/confidence_entropy": 0.43445973687780964,
|
||
|
|
"eval_calibration/coverage@0%": 0.109375,
|
||
|
|
"eval_calibration/coverage@1%": 0.109375,
|
||
|
|
"eval_calibration/coverage@10%": 0.109375,
|
||
|
|
"eval_calibration/coverage@15%": 0.125,
|
||
|
|
"eval_calibration/coverage@20%": 0.1640625,
|
||
|
|
"eval_calibration/coverage@25%": 0.265625,
|
||
|
|
"eval_calibration/coverage@30%": 0.28125,
|
||
|
|
"eval_calibration/coverage@5%": 0.109375,
|
||
|
|
"eval_calibration/ece": 0.2315625,
|
||
|
|
"eval_calibration/mean_confidence": 0.50203125,
|
||
|
|
"eval_completions/clipped_ratio": 0.0,
|
||
|
|
"eval_completions/max_length": 425.75,
|
||
|
|
"eval_completions/max_terminated_length": 425.75,
|
||
|
|
"eval_completions/mean_length": 232.50202178955078,
|
||
|
|
"eval_completions/mean_terminated_length": 232.50202178955078,
|
||
|
|
"eval_completions/min_length": 130.0,
|
||
|
|
"eval_completions/min_terminated_length": 130.0,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 849637151.0,
|
||
|
|
"eval_reward": 0.8972103148698807,
|
||
|
|
"eval_reward_std": 0.2323836162686348,
|
||
|
|
"eval_rewards/accuracy_reward": 0.419921875,
|
||
|
|
"eval_rewards/brier_reward": 0.7953435629606247,
|
||
|
|
"eval_rewards/confidence_uniqueness_reward": 0.896728515625,
|
||
|
|
"eval_rewards/format_reward": 1.0,
|
||
|
|
"eval_rewards/frontier_coverage_0": 0.21285292878746986,
|
||
|
|
"eval_rewards/frontier_coverage_1": 0.21285292878746986,
|
||
|
|
"eval_rewards/frontier_coverage_10": 0.20865214988589287,
|
||
|
|
"eval_rewards/frontier_coverage_15": 0.19585801288485527,
|
||
|
|
"eval_rewards/frontier_coverage_20": 0.14357871003448963,
|
||
|
|
"eval_rewards/frontier_coverage_25": 0.07651386596262455,
|
||
|
|
"eval_rewards/frontier_coverage_5": 0.21138105168938637,
|
||
|
|
"eval_runtime": 21.2115,
|
||
|
|
"eval_samples_per_second": 23.572,
|
||
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4720458984375,
|
||
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49325957894325256,
|
||
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23602294921875,
|
||
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
||
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23602294921875,
|
||
|
|
"eval_signal/advantage_abs_mean": 0.21680431440472603,
|
||
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21680431440472603,
|
||
|
|
"eval_signal/advantage_pre_scale_std": 0.22975903004407883,
|
||
|
|
"eval_signal/advantage_std": 0.22975903004407883,
|
||
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2153194323182106,
|
||
|
|
"eval_signal/brier_reward/group_std_mean": 0.2603638060390949,
|
||
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021531942766159773,
|
||
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.021531942766159773,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0452117919921875,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.053748167119920254,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0045211793622002006,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045211793622002006,
|
||
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
||
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
||
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
||
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
||
|
|
"eval_signal/format_reward/weight": 0.5,
|
||
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.36268793791532516,
|
||
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.4430076330900192,
|
||
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005186437512747943,
|
||
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005186437512747943,
|
||
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.36268793791532516,
|
||
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4430076330900192,
|
||
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005186437512747943,
|
||
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005186437512747943,
|
||
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3564353659749031,
|
||
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4355946108698845,
|
||
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0050970257725566626,
|
||
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0050970257725566626,
|
||
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3375644385814667,
|
||
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.41321366280317307,
|
||
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0048271710984408855,
|
||
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0048271710984408855,
|
||
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.24088909849524498,
|
||
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.2981100380420685,
|
||
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034447142388671637,
|
||
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034447142388671637,
|
||
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.11748435348272324,
|
||
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.14750002324581146,
|
||
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016800262674223632,
|
||
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016800262674223632,
|
||
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.36068040132522583,
|
||
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4406013935804367,
|
||
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005157729727216065,
|
||
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005157729727216065,
|
||
|
|
"eval_steps_per_second": 0.189,
|
||
|
|
"step": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.23505807779794913,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9013709907814249,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9409831599796826,
|
||
|
|
"calibration/confidence_entropy": 0.40754073413461833,
|
||
|
|
"calibration/coverage@0%": 0.021875,
|
||
|
|
"calibration/coverage@1%": 0.021875,
|
||
|
|
"calibration/coverage@10%": 0.08359375,
|
||
|
|
"calibration/coverage@15%": 0.207421875,
|
||
|
|
"calibration/coverage@20%": 0.3484451443248532,
|
||
|
|
"calibration/coverage@25%": 0.6675238502935421,
|
||
|
|
"calibration/coverage@30%": 0.7984306201076321,
|
||
|
|
"calibration/coverage@5%": 0.062890625,
|
||
|
|
"calibration/ece": 0.13932734169649402,
|
||
|
|
"calibration/mean_confidence": 0.5523449613856978,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00078125,
|
||
|
|
"completions/max_length": 635.6,
|
||
|
|
"completions/max_terminated_length": 635.6,
|
||
|
|
"completions/mean_length": 222.9716796875,
|
||
|
|
"completions/mean_terminated_length": 223.14452514648437,
|
||
|
|
"completions/min_length": 22.2,
|
||
|
|
"completions/min_terminated_length": 112.4,
|
||
|
|
"epoch": 0.816,
|
||
|
|
"grad_norm": 0.0009446038166061044,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0002,
|
||
|
|
"num_tokens": 867019549.0,
|
||
|
|
"reward": 0.9719715356826782,
|
||
|
|
"reward_std": 0.07123408019542694,
|
||
|
|
"rewards/accuracy_reward": 0.58046875,
|
||
|
|
"rewards/brier_reward": 0.785218346118927,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9532184958457947,
|
||
|
|
"rewards/format_reward": 0.99921875,
|
||
|
|
"rewards/frontier_coverage_0": 0.09102783054113388,
|
||
|
|
"rewards/frontier_coverage_1": 0.09102783054113388,
|
||
|
|
"rewards/frontier_coverage_10": 0.08938535004854202,
|
||
|
|
"rewards/frontier_coverage_15": 0.08647352084517479,
|
||
|
|
"rewards/frontier_coverage_20": 0.07352498024702073,
|
||
|
|
"rewards/frontier_coverage_25": 0.05705418214201927,
|
||
|
|
"rewards/frontier_coverage_5": 0.09081372916698456,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09835205078125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.1273781567811966,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049176025390625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049176025390625,
|
||
|
|
"signal/advantage_abs_mean": 0.05432458594441414,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.05432458594441414,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10411206483840943,
|
||
|
|
"signal/advantage_std": 0.10411206483840943,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.12405794858932495,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.15806553959846498,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012405795231461524,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012405795231461524,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021034000813961028,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02808857224881649,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021034001372754576,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021034001372754576,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00147705078125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.0036875875666737556,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000738525390625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000738525390625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15168525874614716,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19630924761295318,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00216909924056381,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00216909924056381,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15168525874614716,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19630924761295318,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00216909924056381,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00216909924056381,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14648787081241607,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1897138088941574,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002094776462763548,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002094776462763548,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1395682379603386,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18085283041000366,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001995825790800154,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001995825790800154,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10575809627771378,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.13796985149383545,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015123408054932951,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015123408054932951,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06511625275015831,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08438750207424164,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000931162410415709,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000931162410415709,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15083783268928527,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1952424615621567,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021569809876382353,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021569809876382353,
|
||
|
|
"step": 255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.270215333264285,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9305754323811246,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9413314910446944,
|
||
|
|
"calibration/confidence_entropy": 0.4391170086291357,
|
||
|
|
"calibration/coverage@0%": 0.044930283757338554,
|
||
|
|
"calibration/coverage@1%": 0.06524278375733855,
|
||
|
|
"calibration/coverage@10%": 0.22700587084148727,
|
||
|
|
"calibration/coverage@15%": 0.29698125611545984,
|
||
|
|
"calibration/coverage@20%": 0.33645731409001955,
|
||
|
|
"calibration/coverage@25%": 0.4197628730430528,
|
||
|
|
"calibration/coverage@30%": 0.5198783023483367,
|
||
|
|
"calibration/coverage@5%": 0.18008653375733857,
|
||
|
|
"calibration/ece": 0.1048740737634805,
|
||
|
|
"calibration/mean_confidence": 0.49940577494659866,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00107421875,
|
||
|
|
"completions/max_length": 799.0,
|
||
|
|
"completions/max_terminated_length": 799.0,
|
||
|
|
"completions/mean_length": 226.58212890625,
|
||
|
|
"completions/mean_terminated_length": 226.82774963378907,
|
||
|
|
"completions/min_length": 20.6,
|
||
|
|
"completions/min_terminated_length": 109.0,
|
||
|
|
"epoch": 0.832,
|
||
|
|
"grad_norm": 0.000873431155923754,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0004,
|
||
|
|
"num_tokens": 884348102.0,
|
||
|
|
"reward": 0.9639191865921021,
|
||
|
|
"reward_std": 0.0691076509654522,
|
||
|
|
"rewards/accuracy_reward": 0.55126953125,
|
||
|
|
"rewards/brier_reward": 0.8154711127281189,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9510850787162781,
|
||
|
|
"rewards/format_reward": 0.998828125,
|
||
|
|
"rewards/frontier_coverage_0": 0.14494312703609466,
|
||
|
|
"rewards/frontier_coverage_1": 0.14494312703609466,
|
||
|
|
"rewards/frontier_coverage_10": 0.13486984968185425,
|
||
|
|
"rewards/frontier_coverage_15": 0.11929207742214203,
|
||
|
|
"rewards/frontier_coverage_20": 0.09058420956134797,
|
||
|
|
"rewards/frontier_coverage_25": 0.07505071610212326,
|
||
|
|
"rewards/frontier_coverage_5": 0.1444932848215103,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.091925048828125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.12147300839424133,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0459625244140625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0459625244140625,
|
||
|
|
"signal/advantage_abs_mean": 0.05150103196501732,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.05150103196501732,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10343928039073944,
|
||
|
|
"signal/advantage_std": 0.10343928039073944,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.10446172952651978,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.13371139168739318,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010446173511445523,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010446173511445523,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02175750322639942,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.030089304223656653,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021757503971457483,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021757503971457483,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.0022216796875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005560987768694759,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00111083984375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00111083984375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1405455082654953,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18188858330249785,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020098007284104825,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020098007284104825,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1405455082654953,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18188858330249785,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020098007284104825,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020098007284104825,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12980564832687377,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16831763684749604,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001856220792979002,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001856220792979002,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1126504197716713,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.14668649286031724,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016109010437503457,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016109010437503457,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0767782062292099,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10044772624969482,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010979283368214964,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010979283368214964,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05782742351293564,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0740132749080658,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008269321522675454,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008269321522675454,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13997873961925505,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1811675101518631,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002001695986837149,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002001695986837149,
|
||
|
|
"step": 260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.31580053039415606,
|
||
|
|
"calibration/batch_distribution_entropy": 0.943326690831765,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9552051158612647,
|
||
|
|
"calibration/confidence_entropy": 0.46298817442665785,
|
||
|
|
"calibration/coverage@0%": 0.040234375,
|
||
|
|
"calibration/coverage@1%": 0.040234375,
|
||
|
|
"calibration/coverage@10%": 0.187109375,
|
||
|
|
"calibration/coverage@15%": 0.234765625,
|
||
|
|
"calibration/coverage@20%": 0.415625,
|
||
|
|
"calibration/coverage@25%": 0.49028963040275053,
|
||
|
|
"calibration/coverage@30%": 0.5403586996561887,
|
||
|
|
"calibration/coverage@5%": 0.110546875,
|
||
|
|
"calibration/ece": 0.16090085641235513,
|
||
|
|
"calibration/mean_confidence": 0.5592939710679985,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0009765625,
|
||
|
|
"completions/max_length": 719.6,
|
||
|
|
"completions/max_terminated_length": 719.6,
|
||
|
|
"completions/mean_length": 223.590625,
|
||
|
|
"completions/mean_terminated_length": 223.80735473632814,
|
||
|
|
"completions/min_length": 23.6,
|
||
|
|
"completions/min_terminated_length": 108.6,
|
||
|
|
"epoch": 0.848,
|
||
|
|
"grad_norm": 0.0008446628926321864,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0002,
|
||
|
|
"num_tokens": 901652038.0,
|
||
|
|
"reward": 0.9539282083511352,
|
||
|
|
"reward_std": 0.06537192910909653,
|
||
|
|
"rewards/accuracy_reward": 0.5345703125,
|
||
|
|
"rewards/brier_reward": 0.8037230730056762,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9539141178131103,
|
||
|
|
"rewards/format_reward": 0.9990234375,
|
||
|
|
"rewards/frontier_coverage_0": 0.13736073225736617,
|
||
|
|
"rewards/frontier_coverage_1": 0.13736073225736617,
|
||
|
|
"rewards/frontier_coverage_10": 0.12592306435108186,
|
||
|
|
"rewards/frontier_coverage_15": 0.10868992656469345,
|
||
|
|
"rewards/frontier_coverage_20": 0.08467617332935333,
|
||
|
|
"rewards/frontier_coverage_25": 0.06507683843374253,
|
||
|
|
"rewards/frontier_coverage_5": 0.1358505055308342,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0785888671875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.10885387361049652,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.675,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03929443359375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03929443359375,
|
||
|
|
"signal/advantage_abs_mean": 0.04755012765526771,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.04755012765526771,
|
||
|
|
"signal/advantage_pre_scale_std": 0.09628051966428756,
|
||
|
|
"signal/advantage_std": 0.09628051966428756,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1051436722278595,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.13542101085186004,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010514367558062076,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010514367558062076,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021031123772263526,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.028871718794107437,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021031123818829657,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021031123818829657,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.0018798828125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005187963135540485,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00093994140625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13309673368930816,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17585844099521636,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019032832700759173,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019032832700759173,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13309673368930816,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17585844099521636,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019032832700759173,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019032832700759173,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12269736975431442,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16210621297359468,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017545723589137197,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017545723589137197,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10646263808012009,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.14086052179336547,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015224156668409705,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015224156668409705,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08161805719137191,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10817753225564956,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011671381769701838,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011671381769701838,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05570452064275742,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07342620790004731,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007965746102854609,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007965746102854609,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13184687048196791,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1741828888654709,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018854103051126002,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018854103051126002,
|
||
|
|
"step": 265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.26603775459386025,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9328918619130213,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9612254715420541,
|
||
|
|
"calibration/confidence_entropy": 0.47528040756113155,
|
||
|
|
"calibration/coverage@0%": 0.034765625,
|
||
|
|
"calibration/coverage@1%": 0.034765625,
|
||
|
|
"calibration/coverage@10%": 0.15595110689823874,
|
||
|
|
"calibration/coverage@15%": 0.24117080479452055,
|
||
|
|
"calibration/coverage@20%": 0.33809243517612525,
|
||
|
|
"calibration/coverage@25%": 0.4576481470156556,
|
||
|
|
"calibration/coverage@30%": 0.5338490704500979,
|
||
|
|
"calibration/coverage@5%": 0.04765625,
|
||
|
|
"calibration/ece": 0.13823007154421446,
|
||
|
|
"calibration/mean_confidence": 0.6021676149102527,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00126953125,
|
||
|
|
"completions/max_length": 696.2,
|
||
|
|
"completions/max_terminated_length": 696.2,
|
||
|
|
"completions/mean_length": 220.351953125,
|
||
|
|
"completions/mean_terminated_length": 220.63258666992186,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 103.2,
|
||
|
|
"epoch": 0.864,
|
||
|
|
"grad_norm": 0.0010584808187559247,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0011,
|
||
|
|
"num_tokens": 918895258.0,
|
||
|
|
"reward": 0.9751318335533142,
|
||
|
|
"reward_std": 0.0712385781109333,
|
||
|
|
"rewards/accuracy_reward": 0.5873046875,
|
||
|
|
"rewards/brier_reward": 0.7925786375999451,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9517351508140564,
|
||
|
|
"rewards/format_reward": 0.99873046875,
|
||
|
|
"rewards/frontier_coverage_0": 0.0879151001572609,
|
||
|
|
"rewards/frontier_coverage_1": 0.0879151001572609,
|
||
|
|
"rewards/frontier_coverage_10": 0.08317596241831779,
|
||
|
|
"rewards/frontier_coverage_15": 0.07355262599885463,
|
||
|
|
"rewards/frontier_coverage_20": 0.060579386353492734,
|
||
|
|
"rewards/frontier_coverage_25": 0.056345708668231964,
|
||
|
|
"rewards/frontier_coverage_5": 0.08777875155210495,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09532470703125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.1278452306985855,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047662353515625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.047662353515625,
|
||
|
|
"signal/advantage_abs_mean": 0.052195066958665846,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.052195066958665846,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10198588073253631,
|
||
|
|
"signal/advantage_std": 0.10198588073253631,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.11365769803524017,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.14665003418922423,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011365770548582076,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011365770548582076,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022322241216897964,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.031285477429628374,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022322241216897964,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022322241216897964,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002435302734375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.006508936360478401,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.965625,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012176513671875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0012176513671875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14946494698524476,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1939655214548111,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002137348777614534,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002137348777614534,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14946494698524476,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1939655214548111,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002137348777614534,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002137348777614534,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13306838274002075,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17288758158683776,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019028778653591872,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019028778653591872,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11056115180253982,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.14380019903182983,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015810244716703893,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015810244716703893,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08252616226673126,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10731232017278672,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011801241431385278,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011801241431385278,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06337658017873764,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08188621997833252,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000906285154633224,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000906285154633224,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14790982007980347,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19197991490364075,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002115110377781093,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002115110377781093,
|
||
|
|
"step": 270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.3599516688133769,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9434260369168366,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9574774690861215,
|
||
|
|
"calibration/confidence_entropy": 0.437518390452558,
|
||
|
|
"calibration/coverage@0%": 0.005484059634127623,
|
||
|
|
"calibration/coverage@1%": 0.005484059634127623,
|
||
|
|
"calibration/coverage@10%": 0.05318469468266759,
|
||
|
|
"calibration/coverage@15%": 0.11540664210889835,
|
||
|
|
"calibration/coverage@20%": 0.16473415436380032,
|
||
|
|
"calibration/coverage@25%": 0.25131654159951655,
|
||
|
|
"calibration/coverage@30%": 0.33701887518706114,
|
||
|
|
"calibration/coverage@5%": 0.005484059634127623,
|
||
|
|
"calibration/ece": 0.1427166164579369,
|
||
|
|
"calibration/mean_confidence": 0.5087113572934878,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00126953125,
|
||
|
|
"completions/max_length": 853.8,
|
||
|
|
"completions/max_terminated_length": 853.8,
|
||
|
|
"completions/mean_length": 218.22265625,
|
||
|
|
"completions/mean_terminated_length": 218.5016082763672,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 104.0,
|
||
|
|
"epoch": 0.88,
|
||
|
|
"grad_norm": 0.0008610005606897175,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0013,
|
||
|
|
"num_tokens": 936276930.0,
|
||
|
|
"reward": 0.9432915210723877,
|
||
|
|
"reward_std": 0.07173903733491897,
|
||
|
|
"rewards/accuracy_reward": 0.51357421875,
|
||
|
|
"rewards/brier_reward": 0.7973132848739624,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9522138953208923,
|
||
|
|
"rewards/format_reward": 0.99873046875,
|
||
|
|
"rewards/frontier_coverage_0": 0.1482342377305031,
|
||
|
|
"rewards/frontier_coverage_1": 0.1482342377305031,
|
||
|
|
"rewards/frontier_coverage_10": 0.13063293248414992,
|
||
|
|
"rewards/frontier_coverage_15": 0.11058640480041504,
|
||
|
|
"rewards/frontier_coverage_20": 0.09042486101388932,
|
||
|
|
"rewards/frontier_coverage_25": 0.07815308347344399,
|
||
|
|
"rewards/frontier_coverage_5": 0.1459364965558052,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.095428466796875,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.12478266805410385,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0477142333984375,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0477142333984375,
|
||
|
|
"signal/advantage_abs_mean": 0.05423672944307327,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.05423672944307327,
|
||
|
|
"signal/advantage_pre_scale_std": 0.10489667057991028,
|
||
|
|
"signal/advantage_std": 0.10489667057991028,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.11198111921548844,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.14444852769374847,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011198111996054649,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011198111996054649,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022622523456811906,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03173264637589455,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022622523829340935,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022622523829340935,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.002435302734375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.006508936267346143,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.965625,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012176513671875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0012176513671875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1513482302427292,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1945643663406372,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021642797160893678,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021642797160893678,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1513482302427292,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1945643663406372,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021642797160893678,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021642797160893678,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13286824375391007,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1707235634326935,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019000159576535226,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019000159576535226,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10618945211172104,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13699231892824174,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015185092808678747,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015185092808678747,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08019336313009262,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1035075157880783,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011467650765553117,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011467650765553117,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06250079050660133,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08046629726886749,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008937613223679364,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008937613223679364,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14934307038784028,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.191866672039032,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002135605877265334,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002135605877265334,
|
||
|
|
"step": 275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.36265407533836524,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9281281036058401,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9562500495415044,
|
||
|
|
"calibration/confidence_entropy": 0.4386896076321184,
|
||
|
|
"calibration/coverage@0%": 0.015291047926019725,
|
||
|
|
"calibration/coverage@1%": 0.015291047926019725,
|
||
|
|
"calibration/coverage@10%": 0.05053767782798051,
|
||
|
|
"calibration/coverage@15%": 0.12166972439660798,
|
||
|
|
"calibration/coverage@20%": 0.1737108850389471,
|
||
|
|
"calibration/coverage@25%": 0.23867640463719733,
|
||
|
|
"calibration/coverage@30%": 0.3657895808152028,
|
||
|
|
"calibration/coverage@5%": 0.04192401361229423,
|
||
|
|
"calibration/ece": 0.1668563098098826,
|
||
|
|
"calibration/mean_confidence": 0.5572778727670613,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00048828125,
|
||
|
|
"completions/max_length": 771.2,
|
||
|
|
"completions/max_terminated_length": 771.2,
|
||
|
|
"completions/mean_length": 219.6869140625,
|
||
|
|
"completions/mean_terminated_length": 219.7939697265625,
|
||
|
|
"completions/min_length": 44.0,
|
||
|
|
"completions/min_terminated_length": 103.8,
|
||
|
|
"epoch": 0.896,
|
||
|
|
"grad_norm": 0.000931259011849761,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0002,
|
||
|
|
"num_tokens": 953637372.0,
|
||
|
|
"reward": 0.9620537042617798,
|
||
|
|
"reward_std": 0.062380281090736386,
|
||
|
|
"rewards/accuracy_reward": 0.55,
|
||
|
|
"rewards/brier_reward": 0.8034387230873108,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.953917121887207,
|
||
|
|
"rewards/format_reward": 0.99951171875,
|
||
|
|
"rewards/frontier_coverage_0": 0.12949443906545638,
|
||
|
|
"rewards/frontier_coverage_1": 0.12949443906545638,
|
||
|
|
"rewards/frontier_coverage_10": 0.12721864879131317,
|
||
|
|
"rewards/frontier_coverage_15": 0.11173846274614334,
|
||
|
|
"rewards/frontier_coverage_20": 0.09186044484376907,
|
||
|
|
"rewards/frontier_coverage_25": 0.09052028059959412,
|
||
|
|
"rewards/frontier_coverage_5": 0.12822107076644898,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.083984375,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.11366891264915466,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0419921875,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0419921875,
|
||
|
|
"signal/advantage_abs_mean": 0.045716925710439685,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.045716925710439685,
|
||
|
|
"signal/advantage_pre_scale_std": 0.09269649535417557,
|
||
|
|
"signal/advantage_std": 0.09269649535417557,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.10150657594203949,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.13047962486743928,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010150657780468465,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010150657780468465,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020040722191333772,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02639743983745575,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0020040722563862802,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020040722563862802,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13849151730537415,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1763758659362793,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019804287469014524,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019804287469014524,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13849151730537415,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1763758659362793,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019804287469014524,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019804287469014524,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12867191731929778,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16390889883041382,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018400083761662244,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018400083761662244,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10785606354475022,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1374804839491844,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015423417557030917,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015423417557030917,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08022382259368896,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10267434567213059,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011472006561234593,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011472006561234593,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06498619243502617,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08332770913839341,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009293025592342019,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009293025592342019,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1374574899673462,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17511171400547026,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019656420452520253,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019656420452520253,
|
||
|
|
"step": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.37522512880027037,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9442782389923409,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.947399954454208,
|
||
|
|
"calibration/confidence_entropy": 0.4650686166519636,
|
||
|
|
"calibration/coverage@0%": 0.016413129892367907,
|
||
|
|
"calibration/coverage@1%": 0.016413129892367907,
|
||
|
|
"calibration/coverage@10%": 0.07937443248049807,
|
||
|
|
"calibration/coverage@15%": 0.10954009480048402,
|
||
|
|
"calibration/coverage@20%": 0.2910374784209907,
|
||
|
|
"calibration/coverage@25%": 0.36547853654511975,
|
||
|
|
"calibration/coverage@30%": 0.4070216998710808,
|
||
|
|
"calibration/coverage@5%": 0.04242551894562234,
|
||
|
|
"calibration/ece": 0.1624069213865474,
|
||
|
|
"calibration/mean_confidence": 0.5441825835729568,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00087890625,
|
||
|
|
"completions/max_length": 791.0,
|
||
|
|
"completions/max_terminated_length": 791.0,
|
||
|
|
"completions/mean_length": 218.5203125,
|
||
|
|
"completions/mean_terminated_length": 218.70968627929688,
|
||
|
|
"completions/min_length": 46.0,
|
||
|
|
"completions/min_terminated_length": 108.0,
|
||
|
|
"epoch": 0.912,
|
||
|
|
"grad_norm": 0.0007959533832035959,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0005,
|
||
|
|
"num_tokens": 970926316.0,
|
||
|
|
"reward": 0.9589624524116516,
|
||
|
|
"reward_std": 0.06368861570954323,
|
||
|
|
"rewards/accuracy_reward": 0.54443359375,
|
||
|
|
"rewards/brier_reward": 0.8047209978103638,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9570178985595703,
|
||
|
|
"rewards/format_reward": 0.99912109375,
|
||
|
|
"rewards/frontier_coverage_0": 0.1231890469789505,
|
||
|
|
"rewards/frontier_coverage_1": 0.1231890469789505,
|
||
|
|
"rewards/frontier_coverage_10": 0.11918876320123672,
|
||
|
|
"rewards/frontier_coverage_15": 0.11012653410434722,
|
||
|
|
"rewards/frontier_coverage_20": 0.08735538721084594,
|
||
|
|
"rewards/frontier_coverage_25": 0.0844956398010254,
|
||
|
|
"rewards/frontier_coverage_5": 0.12247090861201286,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.077764892578125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.10732522755861282,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0388824462890625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0388824462890625,
|
||
|
|
"signal/advantage_abs_mean": 0.046620288491249086,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.046620288491249086,
|
||
|
|
"signal/advantage_pre_scale_std": 0.09408236593008042,
|
||
|
|
"signal/advantage_std": 0.09408236593008042,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.10646310597658157,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.13785125315189362,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010646310821175575,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010646310821175575,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.018950655311346053,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.024912358820438386,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001895065582357347,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001895065582357347,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.001580810546875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.0033625274430960418,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007904052734375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007904052734375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1370942160487175,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1766595095396042,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019604472909122705,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019604472909122705,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1370942160487175,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1766595095396042,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019604472909122705,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019604472909122705,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13189242035150528,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16979371905326843,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018860616255551577,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018860616255551577,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11495250314474106,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.14815734326839447,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016438208287581802,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016438208287581802,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0817980095744133,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10573563128709793,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001169711514376104,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001169711514376104,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06745465323328972,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0874060109257698,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009646015590988099,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009646015590988099,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13618865460157395,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17541859149932862,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019474976696074009,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019474976696074009,
|
||
|
|
"step": 285
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.4086159994676217,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9488851070088791,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9612514103049214,
|
||
|
|
"calibration/confidence_entropy": 0.46317632801857994,
|
||
|
|
"calibration/coverage@0%": 0.0019546538649706457,
|
||
|
|
"calibration/coverage@1%": 0.0019546538649706457,
|
||
|
|
"calibration/coverage@10%": 0.0019546538649706457,
|
||
|
|
"calibration/coverage@15%": 0.0019546538649706457,
|
||
|
|
"calibration/coverage@20%": 0.030470278864970647,
|
||
|
|
"calibration/coverage@25%": 0.1752140410958904,
|
||
|
|
"calibration/coverage@30%": 0.23304718077299413,
|
||
|
|
"calibration/coverage@5%": 0.0019546538649706457,
|
||
|
|
"calibration/ece": 0.16270723826949696,
|
||
|
|
"calibration/mean_confidence": 0.5144905505616594,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00068359375,
|
||
|
|
"completions/max_length": 669.2,
|
||
|
|
"completions/max_terminated_length": 669.2,
|
||
|
|
"completions/mean_length": 212.41162109375,
|
||
|
|
"completions/mean_terminated_length": 212.55691833496093,
|
||
|
|
"completions/min_length": 22.6,
|
||
|
|
"completions/min_terminated_length": 102.4,
|
||
|
|
"epoch": 0.928,
|
||
|
|
"grad_norm": 0.0006845975876785815,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0006,
|
||
|
|
"num_tokens": 988128227.0,
|
||
|
|
"reward": 0.9499905347824097,
|
||
|
|
"reward_std": 0.0637421689927578,
|
||
|
|
"rewards/accuracy_reward": 0.53134765625,
|
||
|
|
"rewards/brier_reward": 0.7886561274528503,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9511130928993226,
|
||
|
|
"rewards/format_reward": 0.99921875,
|
||
|
|
"rewards/frontier_coverage_0": 0.1233413815498352,
|
||
|
|
"rewards/frontier_coverage_1": 0.1233413815498352,
|
||
|
|
"rewards/frontier_coverage_10": 0.11664480417966842,
|
||
|
|
"rewards/frontier_coverage_15": 0.10323808938264847,
|
||
|
|
"rewards/frontier_coverage_20": 0.0839030459523201,
|
||
|
|
"rewards/frontier_coverage_25": 0.07939638644456863,
|
||
|
|
"rewards/frontier_coverage_5": 0.12051307708024979,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.076837158203125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.1088681623339653,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0384185791015625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0384185791015625,
|
||
|
|
"signal/advantage_abs_mean": 0.0450009323656559,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.0450009323656559,
|
||
|
|
"signal/advantage_pre_scale_std": 0.0930885449051857,
|
||
|
|
"signal/advantage_std": 0.0930885449051857,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.10286559611558914,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.13540334701538087,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010286559909582138,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010286559909582138,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022042611613869667,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02982432134449482,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022042611613869666,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022042611613869666,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.004419417260214687,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13377356976270677,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17625623643398286,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019129620399326087,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019129620399326087,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13377356976270677,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17625623643398286,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019129620399326087,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019129620399326087,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12501855790615082,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16456068456172943,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017877653473988176,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017877653473988176,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10385439693927764,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1367882251739502,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001485117874108255,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001485117874108255,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07855610102415085,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10324958562850953,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011233522789552807,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011233522789552807,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06524143964052201,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08521927446126938,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009329525521025062,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009329525521025062,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13079718947410585,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1722914159297943,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018703997833654285,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018703997833654285,
|
||
|
|
"step": 290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.23838562616966347,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9505648955577994,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9718941061132969,
|
||
|
|
"calibration/confidence_entropy": 0.46715676252481525,
|
||
|
|
"calibration/coverage@0%": 0.04140625,
|
||
|
|
"calibration/coverage@1%": 0.04140625,
|
||
|
|
"calibration/coverage@10%": 0.241015625,
|
||
|
|
"calibration/coverage@15%": 0.339453125,
|
||
|
|
"calibration/coverage@20%": 0.458984375,
|
||
|
|
"calibration/coverage@25%": 0.552734375,
|
||
|
|
"calibration/coverage@30%": 0.63515625,
|
||
|
|
"calibration/coverage@5%": 0.137109375,
|
||
|
|
"calibration/ece": 0.09940677972037605,
|
||
|
|
"calibration/mean_confidence": 0.531384959032291,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00048828125,
|
||
|
|
"completions/max_length": 771.4,
|
||
|
|
"completions/max_terminated_length": 771.4,
|
||
|
|
"completions/mean_length": 212.0873046875,
|
||
|
|
"completions/mean_terminated_length": 212.19146118164062,
|
||
|
|
"completions/min_length": 42.6,
|
||
|
|
"completions/min_terminated_length": 104.6,
|
||
|
|
"epoch": 0.944,
|
||
|
|
"grad_norm": 0.0008118631085380912,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0001,
|
||
|
|
"num_tokens": 1005275425.0,
|
||
|
|
"reward": 0.9531220674514771,
|
||
|
|
"reward_std": 0.06911338046193123,
|
||
|
|
"rewards/accuracy_reward": 0.5326171875,
|
||
|
|
"rewards/brier_reward": 0.7961865067481995,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9509644865989685,
|
||
|
|
"rewards/format_reward": 0.99951171875,
|
||
|
|
"rewards/frontier_coverage_0": 0.13934872150421143,
|
||
|
|
"rewards/frontier_coverage_1": 0.13934872150421143,
|
||
|
|
"rewards/frontier_coverage_10": 0.13322099447250366,
|
||
|
|
"rewards/frontier_coverage_15": 0.12536731064319612,
|
||
|
|
"rewards/frontier_coverage_20": 0.10790151357650757,
|
||
|
|
"rewards/frontier_coverage_25": 0.07890170142054558,
|
||
|
|
"rewards/frontier_coverage_5": 0.13902259171009063,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09739990234375,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.1297285944223404,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.048699951171875,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.048699951171875,
|
||
|
|
"signal/advantage_abs_mean": 0.051316916942596436,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.051316916942596436,
|
||
|
|
"signal/advantage_pre_scale_std": 0.1007079765200615,
|
||
|
|
"signal/advantage_std": 0.1007079765200615,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.10415657460689545,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.1357142448425293,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010415657423436642,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010415657423436642,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02197747528553009,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.028798850253224373,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002197747630998492,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002197747630998492,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14225318431854247,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18444684743881226,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002034220518544316,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002034220518544316,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14225318431854247,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18444684743881226,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002034220518544316,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002034220518544316,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13472193479537964,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1746243953704834,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019265236100181938,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019265236100181938,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12378777861595154,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16058792769908906,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001770165259949863,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001770165259949863,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10432201772928237,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.13533593565225602,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014918048167601229,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014918048167601229,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06784244105219842,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08792800456285477,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009701469331048429,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009701469331048429,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14184999465942383,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18391945362091064,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020284549333155154,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020284549333155154,
|
||
|
|
"step": 295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.341505781144742,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9222308232707069,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9632253904748493,
|
||
|
|
"calibration/confidence_entropy": 0.4160752706670393,
|
||
|
|
"calibration/coverage@0%": 0.0046875,
|
||
|
|
"calibration/coverage@1%": 0.0046875,
|
||
|
|
"calibration/coverage@10%": 0.06015625,
|
||
|
|
"calibration/coverage@15%": 0.2171875,
|
||
|
|
"calibration/coverage@20%": 0.29375,
|
||
|
|
"calibration/coverage@25%": 0.33515625,
|
||
|
|
"calibration/coverage@30%": 0.380859375,
|
||
|
|
"calibration/coverage@5%": 0.026953125,
|
||
|
|
"calibration/ece": 0.16675665989188038,
|
||
|
|
"calibration/mean_confidence": 0.5437287107428197,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0009765625,
|
||
|
|
"completions/max_length": 787.6,
|
||
|
|
"completions/max_terminated_length": 787.6,
|
||
|
|
"completions/mean_length": 212.77939453125,
|
||
|
|
"completions/mean_terminated_length": 212.98646240234376,
|
||
|
|
"completions/min_length": 21.4,
|
||
|
|
"completions/min_terminated_length": 101.0,
|
||
|
|
"epoch": 0.96,
|
||
|
|
"grad_norm": 0.000677246309351176,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0008,
|
||
|
|
"num_tokens": 1022394606.0,
|
||
|
|
"reward": 0.9520639538764953,
|
||
|
|
"reward_std": 0.05611773431301117,
|
||
|
|
"rewards/accuracy_reward": 0.52470703125,
|
||
|
|
"rewards/brier_reward": 0.8068634748458863,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9482045531272888,
|
||
|
|
"rewards/format_reward": 0.9990234375,
|
||
|
|
"rewards/frontier_coverage_0": 0.163878333568573,
|
||
|
|
"rewards/frontier_coverage_1": 0.163878333568573,
|
||
|
|
"rewards/frontier_coverage_10": 0.15686869621276855,
|
||
|
|
"rewards/frontier_coverage_15": 0.14912986606359482,
|
||
|
|
"rewards/frontier_coverage_20": 0.1306234270334244,
|
||
|
|
"rewards/frontier_coverage_25": 0.10091332048177719,
|
||
|
|
"rewards/frontier_coverage_5": 0.162114617228508,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.071954345703125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.09751666337251663,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.715625,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0359771728515625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0359771728515625,
|
||
|
|
"signal/advantage_abs_mean": 0.04099631011486053,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.04099631011486053,
|
||
|
|
"signal/advantage_pre_scale_std": 0.08593605160713196,
|
||
|
|
"signal/advantage_std": 0.08593605160713196,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.09962098300457001,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.13012734651565552,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00996209867298603,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.00996209867298603,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02344542071223259,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03061012886464596,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023445420898497104,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023445420898497104,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00177001953125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.003914954606443644,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000885009765625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000885009765625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1334471195936203,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17524456679821016,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019082937389612198,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019082937389612198,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1334471195936203,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17524456679821016,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019082937389612198,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019082937389612198,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1283472567796707,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16842811405658722,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018353657331317663,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018353657331317663,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1224765032529831,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16059996783733368,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001751414081081748,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001751414081081748,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10159540325403213,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.13351670205593108,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014528142288327216,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014528142288327216,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07035883218050003,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09163796603679657,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010061312816105783,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010061312816105783,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13208072930574416,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17346138060092925,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018887544283643365,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018887544283643365,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.96,
|
||
|
|
"eval_calibration/aurc": 0.4217691397090048,
|
||
|
|
"eval_calibration/batch_distribution_entropy": 0.8610313854852217,
|
||
|
|
"eval_calibration/buffer_distribution_entropy": 0.9560461371941351,
|
||
|
|
"eval_calibration/confidence_entropy": 0.434464526008027,
|
||
|
|
"eval_calibration/coverage@0%": 0.0859375,
|
||
|
|
"eval_calibration/coverage@1%": 0.0859375,
|
||
|
|
"eval_calibration/coverage@10%": 0.0859375,
|
||
|
|
"eval_calibration/coverage@15%": 0.0859375,
|
||
|
|
"eval_calibration/coverage@20%": 0.171875,
|
||
|
|
"eval_calibration/coverage@25%": 0.1953125,
|
||
|
|
"eval_calibration/coverage@30%": 0.3828125,
|
||
|
|
"eval_calibration/coverage@5%": 0.0859375,
|
||
|
|
"eval_calibration/ece": 0.15024806293485138,
|
||
|
|
"eval_calibration/mean_confidence": 0.4473162753004427,
|
||
|
|
"eval_completions/clipped_ratio": 0.0,
|
||
|
|
"eval_completions/max_length": 586.75,
|
||
|
|
"eval_completions/max_terminated_length": 586.75,
|
||
|
|
"eval_completions/mean_length": 216.80401229858398,
|
||
|
|
"eval_completions/mean_terminated_length": 216.80401229858398,
|
||
|
|
"eval_completions/min_length": 123.0,
|
||
|
|
"eval_completions/min_terminated_length": 123.0,
|
||
|
|
"eval_loss": 0.0,
|
||
|
|
"eval_num_tokens": 1022394606.0,
|
||
|
|
"eval_reward": 0.9061934798955917,
|
||
|
|
"eval_reward_std": 0.2254936397075653,
|
||
|
|
"eval_rewards/accuracy_reward": 0.423828125,
|
||
|
|
"eval_rewards/brier_reward": 0.8254795223474503,
|
||
|
|
"eval_rewards/confidence_uniqueness_reward": 0.892333984375,
|
||
|
|
"eval_rewards/format_reward": 1.0,
|
||
|
|
"eval_rewards/frontier_coverage_0": 0.2570565640926361,
|
||
|
|
"eval_rewards/frontier_coverage_1": 0.2570565640926361,
|
||
|
|
"eval_rewards/frontier_coverage_10": 0.2536317780613899,
|
||
|
|
"eval_rewards/frontier_coverage_15": 0.24184846132993698,
|
||
|
|
"eval_rewards/frontier_coverage_20": 0.19645333662629128,
|
||
|
|
"eval_rewards/frontier_coverage_25": 0.11186533235013485,
|
||
|
|
"eval_rewards/frontier_coverage_5": 0.25538118183612823,
|
||
|
|
"eval_runtime": 26.4029,
|
||
|
|
"eval_samples_per_second": 18.937,
|
||
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4708251953125,
|
||
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4925679340958595,
|
||
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23541259765625,
|
||
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
||
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23541259765625,
|
||
|
|
"eval_signal/advantage_abs_mean": 0.21049191057682037,
|
||
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21049191057682037,
|
||
|
|
"eval_signal/advantage_pre_scale_std": 0.22300851345062256,
|
||
|
|
"eval_signal/advantage_std": 0.22300851345062256,
|
||
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.18114928901195526,
|
||
|
|
"eval_signal/brier_reward/group_std_mean": 0.23810456693172455,
|
||
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01811492955312133,
|
||
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01811492955312133,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0526275634765625,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06362566910684109,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005262756545562297,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005262756545562297,
|
||
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
||
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
||
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
||
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
||
|
|
"eval_signal/format_reward/weight": 0.5,
|
||
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.39571524411439896,
|
||
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.4655715748667717,
|
||
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005658728186972439,
|
||
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005658728186972439,
|
||
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.39571524411439896,
|
||
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4655715748667717,
|
||
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005658728186972439,
|
||
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005658728186972439,
|
||
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.38869649171829224,
|
||
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.45751991868019104,
|
||
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0055583600187674165,
|
||
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0055583600187674165,
|
||
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.37286993116140366,
|
||
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4395202621817589,
|
||
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005332039901986718,
|
||
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005332039901986718,
|
||
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2987586036324501,
|
||
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.35560180246829987,
|
||
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004272247897461057,
|
||
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004272247897461057,
|
||
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.15963854268193245,
|
||
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.19564306735992432,
|
||
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00228283106116578,
|
||
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00228283106116578,
|
||
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.39209768921136856,
|
||
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.46143699437379837,
|
||
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00560699705965817,
|
||
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00560699705965817,
|
||
|
|
"eval_steps_per_second": 0.151,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.26373361579535665,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9421094436183927,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9528965422674689,
|
||
|
|
"calibration/confidence_entropy": 0.4349062223781308,
|
||
|
|
"calibration/coverage@0%": 0.038671875,
|
||
|
|
"calibration/coverage@1%": 0.038671875,
|
||
|
|
"calibration/coverage@10%": 0.225390625,
|
||
|
|
"calibration/coverage@15%": 0.34921875,
|
||
|
|
"calibration/coverage@20%": 0.4703125,
|
||
|
|
"calibration/coverage@25%": 0.51953125,
|
||
|
|
"calibration/coverage@30%": 0.6046875,
|
||
|
|
"calibration/coverage@5%": 0.12890625,
|
||
|
|
"calibration/ece": 0.17938745999152064,
|
||
|
|
"calibration/mean_confidence": 0.47612366714428206,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.00078125,
|
||
|
|
"completions/max_length": 1079.0,
|
||
|
|
"completions/max_terminated_length": 1079.0,
|
||
|
|
"completions/mean_length": 218.6193359375,
|
||
|
|
"completions/mean_terminated_length": 218.78925476074218,
|
||
|
|
"completions/min_length": 20.2,
|
||
|
|
"completions/min_terminated_length": 105.4,
|
||
|
|
"epoch": 0.976,
|
||
|
|
"grad_norm": 0.0011953199282288551,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": -0.0004,
|
||
|
|
"num_tokens": 1039494388.0,
|
||
|
|
"reward": 0.9646060228347778,
|
||
|
|
"reward_std": 0.06057727336883545,
|
||
|
|
"rewards/accuracy_reward": 0.5572265625,
|
||
|
|
"rewards/brier_reward": 0.7965489983558655,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9446879029273987,
|
||
|
|
"rewards/format_reward": 0.99921875,
|
||
|
|
"rewards/frontier_coverage_0": 0.13621663516387345,
|
||
|
|
"rewards/frontier_coverage_1": 0.13621663516387345,
|
||
|
|
"rewards/frontier_coverage_10": 0.13521524909883736,
|
||
|
|
"rewards/frontier_coverage_15": 0.123319979198277,
|
||
|
|
"rewards/frontier_coverage_20": 0.10419662147760392,
|
||
|
|
"rewards/frontier_coverage_25": 0.0865128442645073,
|
||
|
|
"rewards/frontier_coverage_5": 0.13564201332628728,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08521728515625,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.11445611119270324,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042608642578125,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.042608642578125,
|
||
|
|
"signal/advantage_abs_mean": 0.0442943774163723,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.0442943774163723,
|
||
|
|
"signal/advantage_pre_scale_std": 0.08979521989822388,
|
||
|
|
"signal/advantage_std": 0.08979521989822388,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.10064524412155151,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.13214921504259108,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010064524598419666,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010064524598419666,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025105124711990355,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03264738321304321,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00251051252707839,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00251051252707839,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00147705078125,
|
||
|
|
"signal/format_reward/group_std_mean": 0.003687587613239884,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000738525390625,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000738525390625,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15215515047311784,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19931451976299286,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00217581856995821,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00217581856995821,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15215515047311784,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19931451976299286,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00217581856995821,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00217581856995821,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14945854544639586,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19589938819408417,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021372571820393203,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021372571820393203,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13901536613702775,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18246963918209075,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001987919630482793,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001987919630482793,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09175378829240799,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12028330266475677,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001312079164199531,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001312079164199531,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06841867417097092,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0888764038681984,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009783869958482684,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009783869958482684,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15160029977560044,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19860751628875734,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021678843069821594,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021678843069821594,
|
||
|
|
"step": 305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.3631079336430432,
|
||
|
|
"calibration/batch_distribution_entropy": 0.8903107972897141,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9423624956881109,
|
||
|
|
"calibration/confidence_entropy": 0.38868585708804126,
|
||
|
|
"calibration/coverage@0%": 0.009375,
|
||
|
|
"calibration/coverage@1%": 0.009375,
|
||
|
|
"calibration/coverage@10%": 0.06881803449119374,
|
||
|
|
"calibration/coverage@15%": 0.0981240826810176,
|
||
|
|
"calibration/coverage@20%": 0.24775562622309194,
|
||
|
|
"calibration/coverage@25%": 0.2883920927103718,
|
||
|
|
"calibration/coverage@30%": 0.42278238136007823,
|
||
|
|
"calibration/coverage@5%": 0.034804611056751464,
|
||
|
|
"calibration/ece": 0.16911921169999553,
|
||
|
|
"calibration/mean_confidence": 0.4151368878784917,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0001953125,
|
||
|
|
"completions/max_length": 615.4,
|
||
|
|
"completions/max_terminated_length": 615.4,
|
||
|
|
"completions/mean_length": 218.641796875,
|
||
|
|
"completions/mean_terminated_length": 218.68403015136718,
|
||
|
|
"completions/min_length": 62.8,
|
||
|
|
"completions/min_terminated_length": 105.8,
|
||
|
|
"epoch": 0.992,
|
||
|
|
"grad_norm": 0.0007313250098377466,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0003,
|
||
|
|
"num_tokens": 1056861760.0,
|
||
|
|
"reward": 0.9498269557952881,
|
||
|
|
"reward_std": 0.05522818714380264,
|
||
|
|
"rewards/accuracy_reward": 0.52666015625,
|
||
|
|
"rewards/brier_reward": 0.7941903471946716,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9336981177330017,
|
||
|
|
"rewards/format_reward": 0.9998046875,
|
||
|
|
"rewards/frontier_coverage_0": 0.16115660667419435,
|
||
|
|
"rewards/frontier_coverage_1": 0.16115660667419435,
|
||
|
|
"rewards/frontier_coverage_10": 0.1574849307537079,
|
||
|
|
"rewards/frontier_coverage_15": 0.14156938940286637,
|
||
|
|
"rewards/frontier_coverage_20": 0.09960801899433136,
|
||
|
|
"rewards/frontier_coverage_25": 0.08336942940950394,
|
||
|
|
"rewards/frontier_coverage_5": 0.16108837127685546,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.076116943359375,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.10370298027992249,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0380584716796875,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0380584716796875,
|
||
|
|
"signal/advantage_abs_mean": 0.04109043329954147,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.04109043329954147,
|
||
|
|
"signal/advantage_pre_scale_std": 0.0837163046002388,
|
||
|
|
"signal/advantage_std": 0.0837163046002388,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.10567554533481598,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.13649180233478547,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010567554831504821,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010567554831504821,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02967868894338608,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.038240250945091245,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029678690247237683,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029678690247237683,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
||
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15005522966384888,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19505343437194825,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002145789796486497,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002145789796486497,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15005522966384888,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19505343437194825,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002145789796486497,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002145789796486497,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1445154994726181,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18783792853355408,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020665716379880904,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020665716379880904,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12960256338119508,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16830175668001174,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018533166265115141,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018533166265115141,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09365049600601197,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12195496857166291,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013392021879553794,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013392021879553794,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.059622716158628464,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0775347501039505,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008526048739440739,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008526048739440739,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14962632954120636,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19450531899929047,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021396565716713667,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021396565716713667,
|
||
|
|
"step": 310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"calibration/aurc": 0.28396650781922805,
|
||
|
|
"calibration/batch_distribution_entropy": 0.9298624640673321,
|
||
|
|
"calibration/buffer_distribution_entropy": 0.9416951559070315,
|
||
|
|
"calibration/confidence_entropy": 0.42543707865259917,
|
||
|
|
"calibration/coverage@0%": 0.0,
|
||
|
|
"calibration/coverage@1%": 0.0,
|
||
|
|
"calibration/coverage@10%": 0.10546875,
|
||
|
|
"calibration/coverage@15%": 0.1123046875,
|
||
|
|
"calibration/coverage@20%": 0.177734375,
|
||
|
|
"calibration/coverage@25%": 0.52734375,
|
||
|
|
"calibration/coverage@30%": 0.640625,
|
||
|
|
"calibration/coverage@5%": 0.0,
|
||
|
|
"calibration/ece": 0.17469015186957876,
|
||
|
|
"calibration/mean_confidence": 0.5968461396947611,
|
||
|
|
"clip_ratio/high_max": 0.0,
|
||
|
|
"clip_ratio/high_mean": 0.0,
|
||
|
|
"clip_ratio/low_mean": 0.0,
|
||
|
|
"clip_ratio/low_min": 0.0,
|
||
|
|
"clip_ratio/region_mean": 0.0,
|
||
|
|
"completions/clipped_ratio": 0.0011260363520408379,
|
||
|
|
"completions/max_length": 547.0,
|
||
|
|
"completions/max_terminated_length": 547.0,
|
||
|
|
"completions/mean_length": 219.1380844116211,
|
||
|
|
"completions/mean_terminated_length": 219.38528442382812,
|
||
|
|
"completions/min_length": 0.0,
|
||
|
|
"completions/min_terminated_length": 107.0,
|
||
|
|
"epoch": 0.9984,
|
||
|
|
"num_tokens": 1063761617.0,
|
||
|
|
"reward": 0.9532146751880646,
|
||
|
|
"reward_std": 0.0626951027661562,
|
||
|
|
"rewards/accuracy_reward": 0.5458984375,
|
||
|
|
"rewards/brier_reward": 0.7678532898426056,
|
||
|
|
"rewards/confidence_uniqueness_reward": 0.9525276124477386,
|
||
|
|
"rewards/format_reward": 0.9990234375,
|
||
|
|
"rewards/frontier_coverage_0": 0.10089538991451263,
|
||
|
|
"rewards/frontier_coverage_1": 0.10089538991451263,
|
||
|
|
"rewards/frontier_coverage_10": 0.0899675115942955,
|
||
|
|
"rewards/frontier_coverage_15": 0.07815195806324482,
|
||
|
|
"rewards/frontier_coverage_20": 0.07496267184615135,
|
||
|
|
"rewards/frontier_coverage_25": 0.0691562332212925,
|
||
|
|
"rewards/frontier_coverage_5": 0.09545820578932762,
|
||
|
|
"signal/accuracy_reward/centered_abs_mean": 0.078643798828125,
|
||
|
|
"signal/accuracy_reward/group_std_mean": 0.10699028894305229,
|
||
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6796875,
|
||
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0393218994140625,
|
||
|
|
"signal/accuracy_reward/weight": 0.5,
|
||
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0393218994140625,
|
||
|
|
"signal/advantage_abs_mean": 0.04600895382463932,
|
||
|
|
"signal/advantage_pre_scale_abs_mean": 0.04600895382463932,
|
||
|
|
"signal/advantage_pre_scale_std": 0.09656216576695442,
|
||
|
|
"signal/advantage_std": 0.09656216576695442,
|
||
|
|
"signal/brier_reward/centered_abs_mean": 0.1068677231669426,
|
||
|
|
"signal/brier_reward/group_std_mean": 0.13664086163043976,
|
||
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010686772409826517,
|
||
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010686772409826517,
|
||
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020281177014112473,
|
||
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0280290599912405,
|
||
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
||
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0020281175966374576,
|
||
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
||
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020281175966374576,
|
||
|
|
"signal/format_reward/centered_abs_mean": 0.00189208984375,
|
||
|
|
"signal/format_reward/group_std_mean": 0.005524271633476019,
|
||
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
||
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
|
||
|
|
"signal/format_reward/weight": 0.5,
|
||
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
|
||
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13520820438861847,
|
||
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17727045714855194,
|
||
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019334774115122855,
|
||
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019334774115122855,
|
||
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13520820438861847,
|
||
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17727045714855194,
|
||
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019334774115122855,
|
||
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019334774115122855,
|
||
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12330496311187744,
|
||
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16166674345731735,
|
||
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017632609815336764,
|
||
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017632609815336764,
|
||
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10612555965781212,
|
||
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13906469196081161,
|
||
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015175954904407263,
|
||
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015175954904407263,
|
||
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08115751668810844,
|
||
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10668066889047623,
|
||
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001160552492365241,
|
||
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001160552492365241,
|
||
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05299645476043224,
|
||
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06990226730704308,
|
||
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007578493095934391,
|
||
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007578493095934391,
|
||
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13076359033584595,
|
||
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1714838668704033,
|
||
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
||
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018699192441999912,
|
||
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
||
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018699192441999912,
|
||
|
|
"step": 312,
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_loss": -0.0002521653109280846,
|
||
|
|
"train_runtime": 61370.3515,
|
||
|
|
"train_samples_per_second": 0.326,
|
||
|
|
"train_steps_per_second": 0.005
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 312,
|
||
|
|
"num_input_tokens_seen": 1063761617,
|
||
|
|
"num_train_epochs": 1,
|
||
|
|
"save_steps": 60,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_batch_size": 8,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|