Model: hector-gr/RLCR-v4-ks-uniqueness-hotpot-aliases-qwen35-balanced Source: Original Platform
9286 lines
572 KiB
JSON
9286 lines
572 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9984,
|
|
"eval_steps": 50,
|
|
"global_step": 312,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.5847039164885837,
|
|
"calibration/batch_distribution_entropy": 0.6584268879399373,
|
|
"calibration/confidence_entropy": 0.34608582603933763,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.45991905567284935,
|
|
"calibration/mean_confidence": 0.7899003636588005,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03740234375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1504.2,
|
|
"completions/mean_length": 272.08525390625,
|
|
"completions/mean_terminated_length": 222.96121215820312,
|
|
"completions/min_length": 1.8,
|
|
"completions/min_terminated_length": 1.8,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.13381105661392212,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"loss": 0.0938,
|
|
"num_tokens": 17630185.0,
|
|
"reward": 0.6633403062820434,
|
|
"reward_std": 0.5005818009376526,
|
|
"rewards/accuracy_reward": 0.26064453125,
|
|
"rewards/brier_reward": 0.40211123824119566,
|
|
"rewards/confidence_uniqueness_reward": 0.4817495226860046,
|
|
"rewards/format_reward": 0.6783203125,
|
|
"rewards/frontier_aurc_reward": 0.3331014633178711,
|
|
"rewards/frontier_coverage_1": 0.3331014633178711,
|
|
"rewards/frontier_coverage_10": 0.3331014633178711,
|
|
"rewards/frontier_coverage_15": 0.3331014633178711,
|
|
"rewards/frontier_coverage_20": 0.3331014633178711,
|
|
"rewards/frontier_coverage_25": 0.3331014633178711,
|
|
"rewards/frontier_coverage_5": 0.3331014633178711,
|
|
"rewards/frontier_ece_reward": 0.3331014633178711,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.272442626953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.31336791515350343,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.265625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1362213134765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1362213134765625,
|
|
"signal/advantage_abs_mean": 0.4301945328712463,
|
|
"signal/advantage_pre_scale_abs_mean": 0.4301945328712463,
|
|
"signal/advantage_pre_scale_std": 0.5090484619140625,
|
|
"signal/advantage_std": 0.5090484619140625,
|
|
"signal/brier_reward/centered_abs_mean": 0.3353294968605042,
|
|
"signal/brier_reward/group_std_mean": 0.3795027434825897,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04191618710756302,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.04191618710756302,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.29859185218811035,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.34845991134643556,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.037323981523513794,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.037323981523513794,
|
|
"signal/format_reward/centered_abs_mean": 0.4048095703125,
|
|
"signal/format_reward/group_std_mean": 0.4541024386882782,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.20240478515625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.20240478515625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.31411888003349303,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.36313520073890687,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005622727982699871,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005622727982699871,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.31411888003349303,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.36313520073890687,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005622727982699871,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005622727982699871,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.31411888003349303,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.36313520073890687,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005622727982699871,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005622727982699871,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.31411888003349303,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.36313520073890687,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005622727982699871,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005622727982699871,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.31411888003349303,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.36313520073890687,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005622727982699871,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005622727982699871,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.31411888003349303,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.36313520073890687,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005622727982699871,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005622727982699871,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.31411888003349303,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.36313520073890687,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005622727982699871,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005622727982699871,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.31411888003349303,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.36313520073890687,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03926486000418663,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03926486000418663,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6457139978886784,
|
|
"calibration/batch_distribution_entropy": 0.6441986464829549,
|
|
"calibration/confidence_entropy": 0.34328724550232764,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.5055546442235969,
|
|
"calibration/mean_confidence": 0.794898848653782,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03447265625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1509.8,
|
|
"completions/mean_length": 257.67353515625,
|
|
"completions/mean_terminated_length": 212.0354034423828,
|
|
"completions/min_length": 2.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.0325239896774292,
|
|
"learning_rate": 6.249999999999999e-07,
|
|
"loss": 0.0951,
|
|
"num_tokens": 35369114.0,
|
|
"reward": 0.6889099597930908,
|
|
"reward_std": 0.4666505217552185,
|
|
"rewards/accuracy_reward": 0.2486328125,
|
|
"rewards/brier_reward": 0.41154505014419557,
|
|
"rewards/confidence_uniqueness_reward": 0.5230929255485535,
|
|
"rewards/format_reward": 0.72841796875,
|
|
"rewards/frontier_aurc_reward": 0.3338188171386719,
|
|
"rewards/frontier_coverage_1": 0.3338188171386719,
|
|
"rewards/frontier_coverage_10": 0.3338188171386719,
|
|
"rewards/frontier_coverage_15": 0.3338188171386719,
|
|
"rewards/frontier_coverage_20": 0.3338188171386719,
|
|
"rewards/frontier_coverage_25": 0.3338188171386719,
|
|
"rewards/frontier_coverage_5": 0.3338188171386719,
|
|
"rewards/frontier_ece_reward": 0.3338188171386719,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.246484375,
|
|
"signal/accuracy_reward/group_std_mean": 0.2941208004951477,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.278125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1232421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1232421875,
|
|
"signal/advantage_abs_mean": 0.38684009909629824,
|
|
"signal/advantage_pre_scale_abs_mean": 0.38684009909629824,
|
|
"signal/advantage_pre_scale_std": 0.47523062229156493,
|
|
"signal/advantage_std": 0.47523062229156493,
|
|
"signal/brier_reward/centered_abs_mean": 0.31702865958213805,
|
|
"signal/brier_reward/group_std_mean": 0.3654074013233185,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.039628582447767256,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.039628582447767256,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.27955763339996337,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.33765636682510375,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03494470417499542,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03494470417499542,
|
|
"signal/format_reward/centered_abs_mean": 0.367266845703125,
|
|
"signal/format_reward/group_std_mean": 0.4314119637012482,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1836334228515625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1836334228515625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.2960649013519287,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3490014672279358,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005299561750143766,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005299561750143766,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2960649013519287,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3490014672279358,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005299561750143766,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005299561750143766,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2960649013519287,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3490014672279358,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005299561750143766,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005299561750143766,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2960649013519287,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3490014672279358,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005299561750143766,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005299561750143766,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2960649013519287,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3490014672279358,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005299561750143766,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005299561750143766,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2960649013519287,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3490014672279358,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005299561750143766,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005299561750143766,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2960649013519287,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3490014672279358,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005299561750143766,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005299561750143766,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.2960649013519287,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3490014672279358,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03700811266899109,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03700811266899109,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.512972896770905,
|
|
"calibration/batch_distribution_entropy": 0.6455565397380528,
|
|
"calibration/buffer_distribution_entropy": 0.663353331603825,
|
|
"calibration/confidence_entropy": 0.34031164024812754,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.1304147465437788,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3992726308201509,
|
|
"calibration/mean_confidence": 0.7967083523664463,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01474609375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1469.2,
|
|
"completions/mean_length": 195.08359375,
|
|
"completions/mean_terminated_length": 175.09444580078124,
|
|
"completions/min_length": 15.0,
|
|
"completions/min_terminated_length": 15.0,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.014204099774360657,
|
|
"learning_rate": 9.374999999999999e-07,
|
|
"loss": 0.0581,
|
|
"num_tokens": 52415506.0,
|
|
"reward": 0.8369853854179382,
|
|
"reward_std": 0.3447115957736969,
|
|
"rewards/accuracy_reward": 0.32392578125,
|
|
"rewards/brier_reward": 0.5263184905052185,
|
|
"rewards/confidence_uniqueness_reward": 0.6511122345924377,
|
|
"rewards/format_reward": 0.8958984375,
|
|
"rewards/frontier_aurc_reward": 0.31629036981612446,
|
|
"rewards/frontier_coverage_1": 0.3337858706712723,
|
|
"rewards/frontier_coverage_10": 0.3337858706712723,
|
|
"rewards/frontier_coverage_15": 0.3337858706712723,
|
|
"rewards/frontier_coverage_20": 0.3337858706712723,
|
|
"rewards/frontier_coverage_25": 0.3337858706712723,
|
|
"rewards/frontier_coverage_5": 0.3337858706712723,
|
|
"rewards/frontier_ece_reward": 0.30707414969801905,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.215557861328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.2651854813098907,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.321875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1077789306640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1077789306640625,
|
|
"signal/advantage_abs_mean": 0.2599746108055115,
|
|
"signal/advantage_pre_scale_abs_mean": 0.2599746108055115,
|
|
"signal/advantage_pre_scale_std": 0.3583178609609604,
|
|
"signal/advantage_std": 0.3583178609609604,
|
|
"signal/brier_reward/centered_abs_mean": 0.27192609906196596,
|
|
"signal/brier_reward/group_std_mean": 0.32940946221351625,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.033990762382745746,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.033990762382745746,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.19261950254440308,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2545264959335327,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024077437818050385,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.024077437818050385,
|
|
"signal/format_reward/centered_abs_mean": 0.17540283203125,
|
|
"signal/format_reward/group_std_mean": 0.27413243651390073,
|
|
"signal/format_reward/group_zero_std_frac": 0.109375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.087701416015625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.087701416015625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.21203429326415063,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.25519408844411373,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003795413678744808,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003795413678744808,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2363867074251175,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2943639099597931,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0042313218116760256,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0042313218116760256,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2363867074251175,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2943639099597931,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0042313218116760256,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0042313218116760256,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2363867074251175,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2943639099597931,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0042313218116760256,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0042313218116760256,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2363867074251175,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2943639099597931,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0042313218116760256,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0042313218116760256,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2363867074251175,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2943639099597931,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0042313218116760256,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0042313218116760256,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2363867074251175,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2943639099597931,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0042313218116760256,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0042313218116760256,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.23511168360710144,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.2841564893722534,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02938896045088768,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02938896045088768,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4888797539741285,
|
|
"calibration/batch_distribution_entropy": 0.6813886801759734,
|
|
"calibration/buffer_distribution_entropy": 0.6586534364395229,
|
|
"calibration/confidence_entropy": 0.37032147254666903,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3500033465127651,
|
|
"calibration/mean_confidence": 0.7916001832678258,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00244140625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1051.0,
|
|
"completions/mean_length": 139.14345703125,
|
|
"completions/mean_terminated_length": 135.73062591552736,
|
|
"completions/min_length": 24.2,
|
|
"completions/min_terminated_length": 24.2,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.0029658779967576265,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0072,
|
|
"num_tokens": 68758735.0,
|
|
"reward": 0.8559809684753418,
|
|
"reward_std": 0.21398123800754548,
|
|
"rewards/accuracy_reward": 0.39052734375,
|
|
"rewards/brier_reward": 0.5993266940116883,
|
|
"rewards/confidence_uniqueness_reward": 0.7415877938270569,
|
|
"rewards/format_reward": 0.98271484375,
|
|
"rewards/frontier_aurc_reward": -0.006243877112865448,
|
|
"rewards/frontier_coverage_1": 0.051288098096847534,
|
|
"rewards/frontier_coverage_10": 0.051288098096847534,
|
|
"rewards/frontier_coverage_15": 0.051288098096847534,
|
|
"rewards/frontier_coverage_20": 0.051288098096847534,
|
|
"rewards/frontier_coverage_25": 0.051288098096847534,
|
|
"rewards/frontier_coverage_5": 0.051288098096847534,
|
|
"rewards/frontier_ece_reward": -0.029207990132272245,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.215142822265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.2683002293109894,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.303125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1075714111328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1075714111328125,
|
|
"signal/advantage_abs_mean": 0.16140898168087006,
|
|
"signal/advantage_pre_scale_abs_mean": 0.16140898168087006,
|
|
"signal/advantage_pre_scale_std": 0.23029825389385222,
|
|
"signal/advantage_std": 0.23029825389385222,
|
|
"signal/brier_reward/centered_abs_mean": 0.2396304726600647,
|
|
"signal/brier_reward/group_std_mean": 0.2977387011051178,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02995380908250809,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02995380908250809,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.12114065438508988,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1570127099752426,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015142581798136235,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015142581798136235,
|
|
"signal/format_reward/centered_abs_mean": 0.032794189453125,
|
|
"signal/format_reward/group_std_mean": 0.08168496713042259,
|
|
"signal/format_reward/group_zero_std_frac": 0.590625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0163970947265625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0163970947265625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0057399141602218155,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.008327136002480983,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0001027444624924101,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0001027444624924101,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09880194365978241,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15808388888835906,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001768554700538516,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001768554700538516,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09880194365978241,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15808388888835906,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001768554700538516,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001768554700538516,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09880194365978241,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15808388888835906,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001768554700538516,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001768554700538516,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09880194365978241,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.15808388888835906,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001768554700538516,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001768554700538516,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09880194365978241,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15808388888835906,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001768554700538516,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001768554700538516,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09880194365978241,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15808388888835906,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001768554700538516,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001768554700538516,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.10608904957771301,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.13063843846321105,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013261131197214126,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013261131197214126,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5833397552619999,
|
|
"calibration/batch_distribution_entropy": 0.7711499566613911,
|
|
"calibration/buffer_distribution_entropy": 0.6807855611919311,
|
|
"calibration/confidence_entropy": 0.4415601518335488,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.39055978154292853,
|
|
"calibration/mean_confidence": 0.7369087639275899,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1312.4,
|
|
"completions/max_terminated_length": 663.8,
|
|
"completions/mean_length": 118.03896484375,
|
|
"completions/mean_terminated_length": 117.34614868164063,
|
|
"completions/min_length": 36.4,
|
|
"completions/min_terminated_length": 36.4,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.004985923878848553,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 84900606.0,
|
|
"reward": 0.8875869035720825,
|
|
"reward_std": 0.1687161237001419,
|
|
"rewards/accuracy_reward": 0.41005859375,
|
|
"rewards/brier_reward": 0.6432444810867309,
|
|
"rewards/confidence_uniqueness_reward": 0.8054814100265503,
|
|
"rewards/format_reward": 0.99697265625,
|
|
"rewards/frontier_aurc_reward": -0.005297265853732825,
|
|
"rewards/frontier_coverage_1": 0.05199873372912407,
|
|
"rewards/frontier_coverage_10": 0.05199873372912407,
|
|
"rewards/frontier_coverage_15": 0.05199873372912407,
|
|
"rewards/frontier_coverage_20": 0.05199873372912407,
|
|
"rewards/frontier_coverage_25": 0.05199873372912407,
|
|
"rewards/frontier_coverage_5": 0.05199873372912407,
|
|
"rewards/frontier_ece_reward": -0.02007437888532877,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.188824462890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.239495387673378,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.35625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0944122314453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0944122314453125,
|
|
"signal/advantage_abs_mean": 0.13153642565011978,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13153642565011978,
|
|
"signal/advantage_pre_scale_std": 0.19022858738899232,
|
|
"signal/advantage_std": 0.19022858738899232,
|
|
"signal/brier_reward/centered_abs_mean": 0.20990893840789795,
|
|
"signal/brier_reward/group_std_mean": 0.26221993565559387,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026238617300987244,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.026238617300987244,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07374545335769653,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09642878472805023,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009218181669712066,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009218181669712066,
|
|
"signal/format_reward/centered_abs_mean": 0.005865478515625,
|
|
"signal/format_reward/group_std_mean": 0.017125242576003074,
|
|
"signal/format_reward/group_zero_std_frac": 0.903125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0029327392578125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0029327392578125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036071423441171647,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005228751804679632,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.456784321926535e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.456784321926535e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1151951402425766,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1752179741859436,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020619928138330577,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020619928138330577,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1151951402425766,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1752179741859436,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020619928138330577,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020619928138330577,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1151951402425766,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1752179741859436,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020619928138330577,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020619928138330577,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1151951402425766,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1752179741859436,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020619928138330577,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020619928138330577,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1151951402425766,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1752179741859436,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020619928138330577,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020619928138330577,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1151951402425766,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1752179741859436,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020619928138330577,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020619928138330577,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.09093453586101533,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.11268945634365082,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011366816982626916,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011366816982626916,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5097055641069405,
|
|
"calibration/batch_distribution_entropy": 0.8475244813528956,
|
|
"calibration/buffer_distribution_entropy": 0.7181974411858837,
|
|
"calibration/confidence_entropy": 0.5181217659331916,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.2519970886498832,
|
|
"calibration/mean_confidence": 0.6561152287939566,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1300.0,
|
|
"completions/max_terminated_length": 686.4,
|
|
"completions/mean_length": 118.8400390625,
|
|
"completions/mean_terminated_length": 118.28626098632813,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 39.0,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.0021375820506364107,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 101162136.0,
|
|
"reward": 0.9285231471061707,
|
|
"reward_std": 0.15589244663715363,
|
|
"rewards/accuracy_reward": 0.46591796875,
|
|
"rewards/brier_reward": 0.7028721451759339,
|
|
"rewards/confidence_uniqueness_reward": 0.8286531448364258,
|
|
"rewards/format_reward": 0.99765625,
|
|
"rewards/frontier_aurc_reward": -0.004147473024204373,
|
|
"rewards/frontier_coverage_1": 0.0462727814912796,
|
|
"rewards/frontier_coverage_10": 0.0462727814912796,
|
|
"rewards/frontier_coverage_15": 0.0462727814912796,
|
|
"rewards/frontier_coverage_20": 0.0462727814912796,
|
|
"rewards/frontier_coverage_25": 0.0462727814912796,
|
|
"rewards/frontier_coverage_5": 0.0462727814912796,
|
|
"rewards/frontier_ece_reward": 0.0031993848038837313,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.178704833984375,
|
|
"signal/accuracy_reward/group_std_mean": 0.23412654399871827,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0893524169921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0893524169921875,
|
|
"signal/advantage_abs_mean": 0.1198556289076805,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1198556289076805,
|
|
"signal/advantage_pre_scale_std": 0.1744433581829071,
|
|
"signal/advantage_std": 0.1744433581829071,
|
|
"signal/brier_reward/centered_abs_mean": 0.1862693428993225,
|
|
"signal/brier_reward/group_std_mean": 0.23494411408901214,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023283667862415314,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.023283667862415314,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07551151067018509,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09503946453332901,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009438938833773136,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009438938833773136,
|
|
"signal/format_reward/centered_abs_mean": 0.0045166015625,
|
|
"signal/format_reward/group_std_mean": 0.012585635110735894,
|
|
"signal/format_reward/group_zero_std_frac": 0.93125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00225830078125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00225830078125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00249544708058238,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003836318291723728,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4668500049738216e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4668500049738216e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14569330513477324,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2063480108976364,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026079101487994196,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026079101487994196,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14569330513477324,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2063480108976364,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026079101487994196,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026079101487994196,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14569330513477324,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2063480108976364,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026079101487994196,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026079101487994196,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.14569330513477324,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2063480108976364,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026079101487994196,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026079101487994196,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14569330513477324,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2063480108976364,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026079101487994196,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026079101487994196,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14569330513477324,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2063480108976364,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026079101487994196,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026079101487994196,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.07989477664232254,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09803950935602188,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009986847080290318,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009986847080290318,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4034232041511837,
|
|
"calibration/batch_distribution_entropy": 0.8755003538996025,
|
|
"calibration/buffer_distribution_entropy": 0.7633529821095395,
|
|
"calibration/confidence_entropy": 0.5421376521803977,
|
|
"calibration/coverage@0%": 0.0015625,
|
|
"calibration/coverage@1%": 0.0015625,
|
|
"calibration/coverage@10%": 0.0015625,
|
|
"calibration/coverage@15%": 0.0015625,
|
|
"calibration/coverage@20%": 0.0015625,
|
|
"calibration/coverage@25%": 0.013701711849123211,
|
|
"calibration/coverage@30%": 0.09620627146406507,
|
|
"calibration/coverage@5%": 0.0015625,
|
|
"calibration/ece": 0.1530395509526774,
|
|
"calibration/mean_confidence": 0.6019227630383341,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1087.2,
|
|
"completions/max_terminated_length": 611.8,
|
|
"completions/mean_length": 127.91103515625,
|
|
"completions/mean_terminated_length": 127.49808654785156,
|
|
"completions/min_length": 45.4,
|
|
"completions/min_terminated_length": 45.4,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.0021089757792651653,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 117581417.0,
|
|
"reward": 0.9377779960632324,
|
|
"reward_std": 0.14431948363780975,
|
|
"rewards/accuracy_reward": 0.46689453125,
|
|
"rewards/brier_reward": 0.7333363890647888,
|
|
"rewards/confidence_uniqueness_reward": 0.8407991886138916,
|
|
"rewards/format_reward": 0.9982421875,
|
|
"rewards/frontier_aurc_reward": -0.003713348833844066,
|
|
"rewards/frontier_coverage_1": 0.06892486587166786,
|
|
"rewards/frontier_coverage_10": 0.06892486587166786,
|
|
"rewards/frontier_coverage_15": 0.06892486587166786,
|
|
"rewards/frontier_coverage_20": 0.06892486587166786,
|
|
"rewards/frontier_coverage_25": 0.06892486587166786,
|
|
"rewards/frontier_coverage_5": 0.06892486587166786,
|
|
"rewards/frontier_ece_reward": 0.008853092475328594,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.174737548828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.22908719778060913,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.35625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0873687744140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0873687744140625,
|
|
"signal/advantage_abs_mean": 0.11145349889993668,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11145349889993668,
|
|
"signal/advantage_pre_scale_std": 0.16115358769893645,
|
|
"signal/advantage_std": 0.16115358769893645,
|
|
"signal/brier_reward/centered_abs_mean": 0.17493112981319428,
|
|
"signal/brier_reward/group_std_mean": 0.22111110389232635,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021866391226649285,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021866391226649285,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07850788980722427,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09901682883501053,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009813486225903034,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009813486225903034,
|
|
"signal/format_reward/centered_abs_mean": 0.00340576171875,
|
|
"signal/format_reward/group_std_mean": 0.009943688940256833,
|
|
"signal/format_reward/group_zero_std_frac": 0.94375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001702880859375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.001702880859375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020899008959531784,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003328893566504121,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.740922475117259e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.740922475117259e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18038916885852813,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24032150208950043,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032289660535752772,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032289660535752772,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18038916885852813,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24032150208950043,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032289660535752772,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032289660535752772,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18038916885852813,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24032150208950043,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032289660535752772,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032289660535752772,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18038916885852813,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24032150208950043,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032289660535752772,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032289660535752772,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18038916885852813,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24032150208950043,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032289660535752772,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032289660535752772,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18038916885852813,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24032150208950043,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032289660535752772,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032289660535752772,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06583447903394699,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08168520033359528,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008229309879243373,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008229309879243373,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4206024418243185,
|
|
"calibration/batch_distribution_entropy": 0.8995750737642869,
|
|
"calibration/buffer_distribution_entropy": 0.8043121453818675,
|
|
"calibration/confidence_entropy": 0.569409384218363,
|
|
"calibration/coverage@0%": 0.0015655577299412914,
|
|
"calibration/coverage@1%": 0.0015655577299412914,
|
|
"calibration/coverage@10%": 0.0015655577299412914,
|
|
"calibration/coverage@15%": 0.021112860812133073,
|
|
"calibration/coverage@20%": 0.048890808463796474,
|
|
"calibration/coverage@25%": 0.05787518346379648,
|
|
"calibration/coverage@30%": 0.09778085249510762,
|
|
"calibration/coverage@5%": 0.0015655577299412914,
|
|
"calibration/ece": 0.12978587211412798,
|
|
"calibration/mean_confidence": 0.519045968630486,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 865.2,
|
|
"completions/max_terminated_length": 428.0,
|
|
"completions/mean_length": 143.0123046875,
|
|
"completions/mean_terminated_length": 142.74053344726562,
|
|
"completions/min_length": 53.0,
|
|
"completions/min_terminated_length": 53.0,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.0014828367857262492,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 133962535.0,
|
|
"reward": 0.9425314426422119,
|
|
"reward_std": 0.12577675879001618,
|
|
"rewards/accuracy_reward": 0.46201171875,
|
|
"rewards/brier_reward": 0.7536422848701477,
|
|
"rewards/confidence_uniqueness_reward": 0.858219563961029,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_aurc_reward": -0.0033836792223155498,
|
|
"rewards/frontier_coverage_1": 0.0849621519446373,
|
|
"rewards/frontier_coverage_10": 0.0849621519446373,
|
|
"rewards/frontier_coverage_15": 0.0849621519446373,
|
|
"rewards/frontier_coverage_20": 0.0849621519446373,
|
|
"rewards/frontier_coverage_25": 0.0849621519446373,
|
|
"rewards/frontier_coverage_5": 0.0849621519446373,
|
|
"rewards/frontier_ece_reward": 0.01173410825431347,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.153033447265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.20268645882606506,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.421875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0765167236328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0765167236328125,
|
|
"signal/advantage_abs_mean": 0.09732886403799057,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09732886403799057,
|
|
"signal/advantage_pre_scale_std": 0.14202140867710114,
|
|
"signal/advantage_std": 0.14202140867710114,
|
|
"signal/brier_reward/centered_abs_mean": 0.16542658805847169,
|
|
"signal/brier_reward/group_std_mean": 0.20726902186870574,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02067832350730896,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02067832350730896,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06928935050964355,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0870005339384079,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008661168813705444,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008661168813705444,
|
|
"signal/format_reward/centered_abs_mean": 0.0018798828125,
|
|
"signal/format_reward/group_std_mean": 0.005187963135540485,
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00093994140625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001608482375741005,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0025744295679032804,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8791834483854473e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8791834483854473e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20140134692192077,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2594201147556305,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036050839349627494,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036050839349627494,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20140134692192077,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2594201147556305,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036050839349627494,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036050839349627494,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20140134692192077,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2594201147556305,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036050839349627494,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036050839349627494,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20140134692192077,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2594201147556305,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036050839349627494,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036050839349627494,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20140134692192077,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2594201147556305,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036050839349627494,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036050839349627494,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20140134692192077,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2594201147556305,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036050839349627494,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036050839349627494,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05098764970898628,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06506675034761429,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006373456213623285,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006373456213623285,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25230664071765474,
|
|
"calibration/batch_distribution_entropy": 0.9275190996526831,
|
|
"calibration/buffer_distribution_entropy": 0.8422015589527397,
|
|
"calibration/confidence_entropy": 0.5486066737565748,
|
|
"calibration/coverage@0%": 0.006254586594911937,
|
|
"calibration/coverage@1%": 0.006254586594911937,
|
|
"calibration/coverage@10%": 0.18946076932485323,
|
|
"calibration/coverage@15%": 0.2558914811643836,
|
|
"calibration/coverage@20%": 0.35945985200587083,
|
|
"calibration/coverage@25%": 0.5741682974559686,
|
|
"calibration/coverage@30%": 0.6563600782778864,
|
|
"calibration/coverage@5%": 0.03164521159491194,
|
|
"calibration/ece": 0.1880487292583986,
|
|
"calibration/mean_confidence": 0.4893802243864155,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 631.6,
|
|
"completions/max_terminated_length": 453.2,
|
|
"completions/mean_length": 151.640625,
|
|
"completions/mean_terminated_length": 151.50575561523436,
|
|
"completions/min_length": 57.0,
|
|
"completions/min_terminated_length": 57.0,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.0015879464335739613,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 150465767.0,
|
|
"reward": 0.995142936706543,
|
|
"reward_std": 0.11998683214187622,
|
|
"rewards/accuracy_reward": 0.56689453125,
|
|
"rewards/brier_reward": 0.7644174814224243,
|
|
"rewards/confidence_uniqueness_reward": 0.8742515563964843,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.002688299072906375,
|
|
"rewards/frontier_coverage_1": 0.04131883792579174,
|
|
"rewards/frontier_coverage_10": 0.04131883792579174,
|
|
"rewards/frontier_coverage_15": 0.04131883792579174,
|
|
"rewards/frontier_coverage_20": 0.04131883792579174,
|
|
"rewards/frontier_coverage_25": 0.04131883792579174,
|
|
"rewards/frontier_coverage_5": 0.04131883792579174,
|
|
"rewards/frontier_ece_reward": 0.022904913872480392,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.157574462890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.20815051794052125,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.403125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0787872314453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0787872314453125,
|
|
"signal/advantage_abs_mean": 0.09277628511190414,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09277628511190414,
|
|
"signal/advantage_pre_scale_std": 0.13499897867441177,
|
|
"signal/advantage_std": 0.13499897867441177,
|
|
"signal/brier_reward/centered_abs_mean": 0.16650620400905608,
|
|
"signal/brier_reward/group_std_mean": 0.2074718177318573,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02081327550113201,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02081327550113201,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05890683159232139,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07246142774820327,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007363353949040174,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007363353949040174,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_std_mean": 0.004419417260214687,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014498829375952482,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022392344195395707,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5952903524739667e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5952903524739667e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2221655696630478,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.28164334297180177,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003976763784885406,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003976763784885406,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2221655696630478,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.28164334297180177,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003976763784885406,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003976763784885406,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2221655696630478,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.28164334297180177,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003976763784885406,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003976763784885406,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2221655696630478,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.28164334297180177,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003976763784885406,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003976763784885406,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2221655696630478,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.28164334297180177,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003976763784885406,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003976763784885406,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2221655696630478,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.28164334297180177,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003976763784885406,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003976763784885406,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.042747367173433304,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05639359876513481,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005343420896679163,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005343420896679163,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.33645536213345384,
|
|
"calibration/batch_distribution_entropy": 0.9596539888214259,
|
|
"calibration/buffer_distribution_entropy": 0.8735012467520804,
|
|
"calibration/confidence_entropy": 0.5221218785246823,
|
|
"calibration/coverage@0%": 0.0011741682974559687,
|
|
"calibration/coverage@1%": 0.0011741682974559687,
|
|
"calibration/coverage@10%": 0.016408543297455967,
|
|
"calibration/coverage@15%": 0.0781716303816047,
|
|
"calibration/coverage@20%": 0.14271725171232877,
|
|
"calibration/coverage@25%": 0.28699471012720157,
|
|
"calibration/coverage@30%": 0.4003959760273973,
|
|
"calibration/coverage@5%": 0.0011741682974559687,
|
|
"calibration/ece": 0.09525141433601002,
|
|
"calibration/mean_confidence": 0.4579662869305697,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 726.2,
|
|
"completions/max_terminated_length": 499.4,
|
|
"completions/mean_length": 159.56552734375,
|
|
"completions/mean_terminated_length": 159.43108520507812,
|
|
"completions/min_length": 66.6,
|
|
"completions/min_terminated_length": 66.6,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.0011594152310863137,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 167120646.0,
|
|
"reward": 0.9787898898124695,
|
|
"reward_std": 0.11392782926559449,
|
|
"rewards/accuracy_reward": 0.5169921875,
|
|
"rewards/brier_reward": 0.7787827849388123,
|
|
"rewards/confidence_uniqueness_reward": 0.8814345479011536,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.002690990408882499,
|
|
"rewards/frontier_coverage_1": 0.09653475433588028,
|
|
"rewards/frontier_coverage_10": 0.09653475433588028,
|
|
"rewards/frontier_coverage_15": 0.09653475433588028,
|
|
"rewards/frontier_coverage_20": 0.09653475433588028,
|
|
"rewards/frontier_coverage_25": 0.09653475433588028,
|
|
"rewards/frontier_coverage_5": 0.09653475433588028,
|
|
"rewards/frontier_ece_reward": 0.021138164028525353,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14764404296875,
|
|
"signal/accuracy_reward/group_std_mean": 0.19262417852878572,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.45625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.073822021484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.073822021484375,
|
|
"signal/advantage_abs_mean": 0.08894423246383668,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08894423246383668,
|
|
"signal/advantage_pre_scale_std": 0.13065763264894487,
|
|
"signal/advantage_std": 0.13065763264894487,
|
|
"signal/brier_reward/centered_abs_mean": 0.16479850709438323,
|
|
"signal/brier_reward/group_std_mean": 0.20878478586673738,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020599813386797904,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020599813386797904,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05358843132853508,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06951197981834412,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006698553916066885,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006698553916066885,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016239010030403734,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0026413511484861376,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9067828654660845e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9067828654660845e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2256328582763672,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2876005291938782,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004038827959448099,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004038827959448099,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2256328582763672,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2876005291938782,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004038827959448099,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004038827959448099,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2256328582763672,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2876005291938782,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004038827959448099,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004038827959448099,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2256328582763672,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2876005291938782,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004038827959448099,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004038827959448099,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2256328582763672,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2876005291938782,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004038827959448099,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004038827959448099,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2256328582763672,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2876005291938782,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004038827959448099,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004038827959448099,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03931342288851738,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05260321199893951,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004914177861064672,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004914177861064672,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"eval_calibration/aurc": 0.6288164644200097,
|
|
"eval_calibration/batch_distribution_entropy": 0.8989468045052211,
|
|
"eval_calibration/buffer_distribution_entropy": 0.8895391515718143,
|
|
"eval_calibration/confidence_entropy": 0.4647170323267861,
|
|
"eval_calibration/coverage@0%": 0.0078125,
|
|
"eval_calibration/coverage@1%": 0.0078125,
|
|
"eval_calibration/coverage@10%": 0.0078125,
|
|
"eval_calibration/coverage@15%": 0.0078125,
|
|
"eval_calibration/coverage@20%": 0.0078125,
|
|
"eval_calibration/coverage@25%": 0.0078125,
|
|
"eval_calibration/coverage@30%": 0.0078125,
|
|
"eval_calibration/coverage@5%": 0.0078125,
|
|
"eval_calibration/ece": 0.25625,
|
|
"eval_calibration/mean_confidence": 0.47421874999999997,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 320.25,
|
|
"eval_completions/max_terminated_length": 320.25,
|
|
"eval_completions/mean_length": 162.92638778686523,
|
|
"eval_completions/mean_terminated_length": 162.92638778686523,
|
|
"eval_completions/min_length": 84.0,
|
|
"eval_completions/min_terminated_length": 84.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 167120646.0,
|
|
"eval_reward": 0.8989088386297226,
|
|
"eval_reward_std": 0.22175507247447968,
|
|
"eval_rewards/accuracy_reward": 0.36328125,
|
|
"eval_rewards/brier_reward": 0.7475792616605759,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8271484375,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.004181863856501877,
|
|
"eval_rewards/frontier_coverage_1": 0.1855441853404045,
|
|
"eval_rewards/frontier_coverage_10": 0.1855441853404045,
|
|
"eval_rewards/frontier_coverage_15": 0.1855441853404045,
|
|
"eval_rewards/frontier_coverage_20": 0.1855441853404045,
|
|
"eval_rewards/frontier_coverage_25": 0.1855441853404045,
|
|
"eval_rewards/frontier_coverage_5": 0.1855441853404045,
|
|
"eval_rewards/frontier_ece_reward": 0.004597170656779781,
|
|
"eval_runtime": 18.591,
|
|
"eval_samples_per_second": 26.895,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.448486328125,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.48031486570835114,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2242431640625,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2242431640625,
|
|
"eval_signal/advantage_abs_mean": 0.19596171379089355,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.19596171379089355,
|
|
"eval_signal/advantage_pre_scale_std": 0.21975785121321678,
|
|
"eval_signal/advantage_std": 0.21975785121321678,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.22779354453086853,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2757691219449043,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028474193066358566,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.028474193066358566,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0811309814453125,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.10011672414839268,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010141372680664062,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010141372680664062,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0033902853610925376,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0057787023251876235,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.068610764486948e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.068610764486948e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3594288006424904,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4431358575820923,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00643377541564405,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00643377541564405,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3594288006424904,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4431358575820923,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00643377541564405,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00643377541564405,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3594288006424904,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4431358575820923,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00643377541564405,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00643377541564405,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3594288006424904,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.4431358575820923,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00643377541564405,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00643377541564405,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.3594288006424904,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.4431358575820923,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00643377541564405,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00643377541564405,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3594288006424904,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4431358575820923,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00643377541564405,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00643377541564405,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.04951605014503002,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.07548648118972778,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006189506268128753,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006189506268128753,
|
|
"eval_steps_per_second": 0.215,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"step": 50,
|
|
"train_probe_calibration/aurc": 0.3141581882630031,
|
|
"train_probe_calibration/batch_distribution_entropy": 0.936476757190635,
|
|
"train_probe_calibration/buffer_distribution_entropy": 0.8907348399167692,
|
|
"train_probe_calibration/confidence_entropy": 0.49250123077527586,
|
|
"train_probe_calibration/coverage@0%": 0.0859375,
|
|
"train_probe_calibration/coverage@1%": 0.0859375,
|
|
"train_probe_calibration/coverage@10%": 0.140625,
|
|
"train_probe_calibration/coverage@15%": 0.1640625,
|
|
"train_probe_calibration/coverage@20%": 0.2421875,
|
|
"train_probe_calibration/coverage@25%": 0.3671875,
|
|
"train_probe_calibration/coverage@30%": 0.4921875,
|
|
"train_probe_calibration/coverage@5%": 0.0859375,
|
|
"train_probe_calibration/ece": 0.1928515625,
|
|
"train_probe_calibration/mean_confidence": 0.4678515625,
|
|
"train_probe_completions/clipped_ratio": 0.0,
|
|
"train_probe_completions/max_length": 302.0,
|
|
"train_probe_completions/max_terminated_length": 302.0,
|
|
"train_probe_completions/mean_length": 165.1945037841797,
|
|
"train_probe_completions/mean_terminated_length": 165.1945037841797,
|
|
"train_probe_completions/min_length": 85.75,
|
|
"train_probe_completions/min_terminated_length": 85.75,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 167120646.0,
|
|
"train_probe_reward": 0.981845498085022,
|
|
"train_probe_reward_std": 0.225032739341259,
|
|
"train_probe_rewards/accuracy_reward": 0.533203125,
|
|
"train_probe_rewards/brier_reward": 0.7775600701570511,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.83984375,
|
|
"train_probe_rewards/format_reward": 1.0,
|
|
"train_probe_rewards/frontier_aurc_reward": -0.0024623668286949396,
|
|
"train_probe_rewards/frontier_coverage_1": 0.0937139643356204,
|
|
"train_probe_rewards/frontier_coverage_10": 0.0937139643356204,
|
|
"train_probe_rewards/frontier_coverage_15": 0.0937139643356204,
|
|
"train_probe_rewards/frontier_coverage_20": 0.0937139643356204,
|
|
"train_probe_rewards/frontier_coverage_25": 0.0937139643356204,
|
|
"train_probe_rewards/frontier_coverage_5": 0.0937139643356204,
|
|
"train_probe_rewards/frontier_ece_reward": 0.02438117517158389,
|
|
"train_probe_runtime": 16.9521,
|
|
"train_probe_samples_per_second": 29.495,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4891357421875,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.5024252682924271,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.24456787109375,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.24456787109375,
|
|
"train_probe_signal/advantage_abs_mean": 0.20956310257315636,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.20956310257315636,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.22249595075845718,
|
|
"train_probe_signal/advantage_std": 0.22249595075845718,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.1993359997868538,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.24190250411629677,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024916999973356724,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.024916999973356724,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.072021484375,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.08701512962579727,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009002685546875,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009002685546875,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.002210920094512403,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0033829217427410185,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.957547050958965e-05,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.957547050958965e-05,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3611508533358574,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.45599839091300964,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0064645998645573854,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0064645998645573854,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.3611508533358574,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.45599839091300964,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0064645998645573854,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0064645998645573854,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.3611508533358574,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.45599839091300964,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0064645998645573854,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0064645998645573854,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.3611508533358574,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.45599839091300964,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0064645998645573854,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0064645998645573854,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.3611508533358574,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.45599839091300964,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0064645998645573854,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0064645998645573854,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3611508533358574,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.45599839091300964,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0064645998645573854,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0064645998645573854,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.05064787529408932,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.07190386392176151,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006330984411761165,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006330984411761165,
|
|
"train_probe_steps_per_second": 0.236
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34964791555898866,
|
|
"calibration/batch_distribution_entropy": 0.9696213125270348,
|
|
"calibration/buffer_distribution_entropy": 0.8983124294925157,
|
|
"calibration/confidence_entropy": 0.4958941192671529,
|
|
"calibration/coverage@0%": 0.00234375,
|
|
"calibration/coverage@1%": 0.00234375,
|
|
"calibration/coverage@10%": 0.00234375,
|
|
"calibration/coverage@15%": 0.00509424115913556,
|
|
"calibration/coverage@20%": 0.05213040275049117,
|
|
"calibration/coverage@25%": 0.08225227161100197,
|
|
"calibration/coverage@30%": 0.37194278518708257,
|
|
"calibration/coverage@5%": 0.00234375,
|
|
"calibration/ece": 0.17160420712101454,
|
|
"calibration/mean_confidence": 0.5104296144058368,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 876.0,
|
|
"completions/max_terminated_length": 429.8,
|
|
"completions/mean_length": 167.087109375,
|
|
"completions/mean_terminated_length": 166.55306396484374,
|
|
"completions/min_length": 75.0,
|
|
"completions/min_terminated_length": 75.0,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.0011842965614050627,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0021,
|
|
"num_tokens": 184068738.0,
|
|
"reward": 0.9703314185142518,
|
|
"reward_std": 0.11359598636627197,
|
|
"rewards/accuracy_reward": 0.50283203125,
|
|
"rewards/brier_reward": 0.7624746680259704,
|
|
"rewards/confidence_uniqueness_reward": 0.8847023010253906,
|
|
"rewards/format_reward": 0.99912109375,
|
|
"rewards/frontier_aurc_reward": -0.0029593195766210558,
|
|
"rewards/frontier_coverage_1": 0.1041076198220253,
|
|
"rewards/frontier_coverage_10": 0.1041076198220253,
|
|
"rewards/frontier_coverage_15": 0.1041076198220253,
|
|
"rewards/frontier_coverage_20": 0.1041076198220253,
|
|
"rewards/frontier_coverage_25": 0.1041076198220253,
|
|
"rewards/frontier_coverage_5": 0.1041076198220253,
|
|
"rewards/frontier_ece_reward": 0.018636261485517024,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.140679931640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.18439924120903015,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.478125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0703399658203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0703399658203125,
|
|
"signal/advantage_abs_mean": 0.08731478452682495,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08731478452682495,
|
|
"signal/advantage_pre_scale_std": 0.13049161434173584,
|
|
"signal/advantage_std": 0.13049161434173584,
|
|
"signal/brier_reward/centered_abs_mean": 0.17366381585597992,
|
|
"signal/brier_reward/group_std_mean": 0.21920994222164153,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02170797698199749,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02170797698199749,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05222712978720665,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06577225774526596,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006528391223400831,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006528391223400831,
|
|
"signal/format_reward/centered_abs_mean": 0.001702880859375,
|
|
"signal/format_reward/group_std_mean": 0.004971844423562288,
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002111483830958605,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003272761357948184,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.779556063818745e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.779556063818745e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2239651769399643,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.28462483286857604,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004008976416662336,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004008976416662336,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2239651769399643,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.28462483286857604,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004008976416662336,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004008976416662336,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2239651769399643,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.28462483286857604,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004008976416662336,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004008976416662336,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2239651769399643,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.28462483286857604,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004008976416662336,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004008976416662336,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2239651769399643,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.28462483286857604,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004008976416662336,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004008976416662336,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2239651769399643,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.28462483286857604,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004008976416662336,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004008976416662336,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04017831683158875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.052979382872581485,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0050222896039485935,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0050222896039485935,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.33092391595466186,
|
|
"calibration/batch_distribution_entropy": 0.9636589659990976,
|
|
"calibration/buffer_distribution_entropy": 0.9104053730857296,
|
|
"calibration/confidence_entropy": 0.4565744740313494,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.033203125,
|
|
"calibration/coverage@15%": 0.07109375,
|
|
"calibration/coverage@20%": 0.123828125,
|
|
"calibration/coverage@25%": 0.259375,
|
|
"calibration/coverage@30%": 0.41015625,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.10457382812500002,
|
|
"calibration/mean_confidence": 0.5450074218750001,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1111.6,
|
|
"completions/max_terminated_length": 581.6,
|
|
"completions/mean_length": 171.0466796875,
|
|
"completions/mean_terminated_length": 170.64740295410155,
|
|
"completions/min_length": 73.4,
|
|
"completions/min_terminated_length": 73.4,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.0011184883769601583,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0012,
|
|
"num_tokens": 200635072.0,
|
|
"reward": 0.9835161685943603,
|
|
"reward_std": 0.11471217423677445,
|
|
"rewards/accuracy_reward": 0.5234375,
|
|
"rewards/brier_reward": 0.7757836103439331,
|
|
"rewards/confidence_uniqueness_reward": 0.8818888425827026,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0028618790674954653,
|
|
"rewards/frontier_coverage_1": 0.10896083116531372,
|
|
"rewards/frontier_coverage_10": 0.10896083116531372,
|
|
"rewards/frontier_coverage_15": 0.10896083116531372,
|
|
"rewards/frontier_coverage_20": 0.10896083116531372,
|
|
"rewards/frontier_coverage_25": 0.10896083116531372,
|
|
"rewards/frontier_coverage_5": 0.10896083116531372,
|
|
"rewards/frontier_ece_reward": 0.02505997121334076,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.144921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.18714555501937866,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.478125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0724609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0724609375,
|
|
"signal/advantage_abs_mean": 0.08908778131008148,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08908778131008148,
|
|
"signal/advantage_pre_scale_std": 0.1341209128499031,
|
|
"signal/advantage_std": 0.1341209128499031,
|
|
"signal/brier_reward/centered_abs_mean": 0.17730557322502136,
|
|
"signal/brier_reward/group_std_mean": 0.22363218367099763,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02216319665312767,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02216319665312767,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.059557638317346576,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07315693497657776,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007444704789668322,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007444704789668322,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025338933803141115,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003893780894577503,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.535668922471814e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.535668922471814e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21418379843235016,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27754629850387574,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038338899612426758,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038338899612426758,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21418379843235016,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27754629850387574,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038338899612426758,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038338899612426758,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21418379843235016,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27754629850387574,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038338899612426758,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038338899612426758,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21418379843235016,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27754629850387574,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038338899612426758,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038338899612426758,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21418379843235016,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27754629850387574,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038338899612426758,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038338899612426758,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21418379843235016,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27754629850387574,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038338899612426758,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038338899612426758,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0428839735686779,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.055157840996980664,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005360496696084738,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005360496696084738,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25000286826356066,
|
|
"calibration/batch_distribution_entropy": 0.9349390762939768,
|
|
"calibration/buffer_distribution_entropy": 0.9173764134934357,
|
|
"calibration/confidence_entropy": 0.4289778485805737,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.172265625,
|
|
"calibration/coverage@15%": 0.25703125,
|
|
"calibration/coverage@20%": 0.334765625,
|
|
"calibration/coverage@25%": 0.585546875,
|
|
"calibration/coverage@30%": 0.728125,
|
|
"calibration/coverage@5%": 0.0078125,
|
|
"calibration/ece": 0.12878515624999998,
|
|
"calibration/mean_confidence": 0.5667140625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 1127.2,
|
|
"completions/max_terminated_length": 594.2,
|
|
"completions/mean_length": 174.773046875,
|
|
"completions/mean_terminated_length": 173.9766632080078,
|
|
"completions/min_length": 77.6,
|
|
"completions/min_terminated_length": 77.6,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.001059314119629562,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0023,
|
|
"num_tokens": 217456972.0,
|
|
"reward": 1.0045670747756958,
|
|
"reward_std": 0.11658241301774978,
|
|
"rewards/accuracy_reward": 0.567578125,
|
|
"rewards/brier_reward": 0.7832911968231201,
|
|
"rewards/confidence_uniqueness_reward": 0.878244411945343,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.002497353684157133,
|
|
"rewards/frontier_coverage_1": 0.0910948745906353,
|
|
"rewards/frontier_coverage_10": 0.0910948745906353,
|
|
"rewards/frontier_coverage_15": 0.0910948745906353,
|
|
"rewards/frontier_coverage_20": 0.0910948745906353,
|
|
"rewards/frontier_coverage_25": 0.0910948745906353,
|
|
"rewards/frontier_coverage_5": 0.0910948745906353,
|
|
"rewards/frontier_ece_reward": 0.029902569949626923,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14205322265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.18839455246925355,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.459375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.071026611328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.071026611328125,
|
|
"signal/advantage_abs_mean": 0.08862931281328201,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08862931281328201,
|
|
"signal/advantage_pre_scale_std": 0.13590119183063507,
|
|
"signal/advantage_std": 0.13590119183063507,
|
|
"signal/brier_reward/centered_abs_mean": 0.18094989657402039,
|
|
"signal/brier_reward/group_std_mean": 0.227994641661644,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022618737071752548,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022618737071752548,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06556902974843978,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07995099425315857,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008196128718554973,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008196128718554973,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_std_mean": 0.004419417306780815,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002661912888288498,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004251162149012088,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.76482389785815e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.76482389785815e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21462590992450714,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2767761766910553,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003841803641989827,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003841803641989827,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21462590992450714,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2767761766910553,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003841803641989827,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003841803641989827,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21462590992450714,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2767761766910553,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003841803641989827,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003841803641989827,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21462590992450714,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2767761766910553,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003841803641989827,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003841803641989827,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21462590992450714,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2767761766910553,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003841803641989827,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003841803641989827,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21462590992450714,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2767761766910553,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003841803641989827,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003841803641989827,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04156382754445076,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05281273275613785,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005195478443056345,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005195478443056345,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30945615354255396,
|
|
"calibration/batch_distribution_entropy": 0.9465401465342558,
|
|
"calibration/buffer_distribution_entropy": 0.9241643371521917,
|
|
"calibration/confidence_entropy": 0.40588373989046644,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.12734375,
|
|
"calibration/coverage@15%": 0.280078125,
|
|
"calibration/coverage@20%": 0.401953125,
|
|
"calibration/coverage@25%": 0.47265625,
|
|
"calibration/coverage@30%": 0.56015625,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.16765182064885026,
|
|
"calibration/mean_confidence": 0.5055874925850048,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 802.2,
|
|
"completions/mean_length": 175.83212890625,
|
|
"completions/mean_terminated_length": 175.16766052246095,
|
|
"completions/min_length": 74.2,
|
|
"completions/min_terminated_length": 74.2,
|
|
"epoch": 0.224,
|
|
"grad_norm": 0.0009633832960389555,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0016,
|
|
"num_tokens": 234410677.0,
|
|
"reward": 0.990851080417633,
|
|
"reward_std": 0.11021712720394135,
|
|
"rewards/accuracy_reward": 0.5328125,
|
|
"rewards/brier_reward": 0.783275818824768,
|
|
"rewards/confidence_uniqueness_reward": 0.8812341094017029,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.0026209069881588222,
|
|
"rewards/frontier_coverage_1": 0.12510305941104888,
|
|
"rewards/frontier_coverage_10": 0.12510305941104888,
|
|
"rewards/frontier_coverage_15": 0.12510305941104888,
|
|
"rewards/frontier_coverage_20": 0.12510305941104888,
|
|
"rewards/frontier_coverage_25": 0.12510305941104888,
|
|
"rewards/frontier_coverage_5": 0.12510305941104888,
|
|
"rewards/frontier_ece_reward": 0.026669794321060182,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12713623046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1714620292186737,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.490625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.063568115234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.063568115234375,
|
|
"signal/advantage_abs_mean": 0.08353961855173112,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08353961855173112,
|
|
"signal/advantage_pre_scale_std": 0.13004283159971236,
|
|
"signal/advantage_std": 0.13004283159971236,
|
|
"signal/brier_reward/centered_abs_mean": 0.17861478626728058,
|
|
"signal/brier_reward/group_std_mean": 0.22785739600658417,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022326848283410073,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022326848283410073,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06680140942335129,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08212911635637284,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008350176177918911,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008350176177918911,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_std_mean": 0.003866990143433213,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028360622934997083,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004303571488708258,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.076551242382266e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.076551242382266e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21149853765964508,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27557849884033203,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037858237978070975,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037858237978070975,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21149853765964508,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27557849884033203,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037858237978070975,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037858237978070975,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21149853765964508,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27557849884033203,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037858237978070975,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037858237978070975,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21149853765964508,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27557849884033203,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037858237978070975,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037858237978070975,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21149853765964508,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27557849884033203,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037858237978070975,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037858237978070975,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21149853765964508,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27557849884033203,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037858237978070975,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037858237978070975,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.037810226529836656,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04775542095303535,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004726278316229582,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004726278316229582,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.261170135309063,
|
|
"calibration/batch_distribution_entropy": 0.898169931982497,
|
|
"calibration/buffer_distribution_entropy": 0.9293096435116759,
|
|
"calibration/confidence_entropy": 0.3891703973877817,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.1546875,
|
|
"calibration/coverage@15%": 0.29375,
|
|
"calibration/coverage@20%": 0.36953125,
|
|
"calibration/coverage@25%": 0.490234375,
|
|
"calibration/coverage@30%": 0.64921875,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.14350970052213544,
|
|
"calibration/mean_confidence": 0.5793494293798254,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1348.4,
|
|
"completions/max_terminated_length": 551.0,
|
|
"completions/mean_length": 177.95,
|
|
"completions/mean_terminated_length": 177.28620300292968,
|
|
"completions/min_length": 77.8,
|
|
"completions/min_terminated_length": 77.8,
|
|
"epoch": 0.24,
|
|
"grad_norm": 0.0010709511116147041,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0017,
|
|
"num_tokens": 251484565.0,
|
|
"reward": 1.0148473739624024,
|
|
"reward_std": 0.11671035438776016,
|
|
"rewards/accuracy_reward": 0.59541015625,
|
|
"rewards/brier_reward": 0.7709134936332702,
|
|
"rewards/confidence_uniqueness_reward": 0.8791958093643188,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.0024798931321129204,
|
|
"rewards/frontier_coverage_1": 0.0713951449841261,
|
|
"rewards/frontier_coverage_10": 0.0713951449841261,
|
|
"rewards/frontier_coverage_15": 0.0713951449841261,
|
|
"rewards/frontier_coverage_20": 0.0713951449841261,
|
|
"rewards/frontier_coverage_25": 0.0713951449841261,
|
|
"rewards/frontier_coverage_5": 0.0713951449841261,
|
|
"rewards/frontier_ece_reward": 0.028385131061077117,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.145281982421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.19074405431747438,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.453125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0726409912109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0726409912109375,
|
|
"signal/advantage_abs_mean": 0.09049365520477295,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09049365520477295,
|
|
"signal/advantage_pre_scale_std": 0.13883660733699799,
|
|
"signal/advantage_std": 0.13883660733699799,
|
|
"signal/brier_reward/centered_abs_mean": 0.19185077846050264,
|
|
"signal/brier_reward/group_std_mean": 0.24133287370204926,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02398134730756283,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02398134730756283,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07062419950962066,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08817773014307022,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008828024938702583,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008828024938702583,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_std_mean": 0.0033145629800856113,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032755933701992036,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005048427078872919,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.863312107976526e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.863312107976526e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21241567730903627,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2803816318511963,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003802240453660488,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003802240453660488,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21241567730903627,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2803816318511963,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003802240453660488,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003802240453660488,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21241567730903627,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2803816318511963,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003802240453660488,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003802240453660488,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21241567730903627,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2803816318511963,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003802240453660488,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003802240453660488,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21241567730903627,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2803816318511963,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003802240453660488,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003802240453660488,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21241567730903627,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2803816318511963,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003802240453660488,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003802240453660488,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03876788690686226,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04816498681902885,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0048459858633577825,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0048459858633577825,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2467021288852847,
|
|
"calibration/batch_distribution_entropy": 0.8769867203545791,
|
|
"calibration/buffer_distribution_entropy": 0.9305634576699605,
|
|
"calibration/confidence_entropy": 0.35485692049776835,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.141796875,
|
|
"calibration/coverage@15%": 0.353515625,
|
|
"calibration/coverage@20%": 0.529296875,
|
|
"calibration/coverage@25%": 0.612109375,
|
|
"calibration/coverage@30%": 0.683984375,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.1159896194053629,
|
|
"calibration/mean_confidence": 0.5697380717473134,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 1156.4,
|
|
"completions/max_terminated_length": 519.2,
|
|
"completions/mean_length": 172.75732421875,
|
|
"completions/mean_terminated_length": 171.95858154296874,
|
|
"completions/min_length": 80.0,
|
|
"completions/min_terminated_length": 80.0,
|
|
"epoch": 0.256,
|
|
"grad_norm": 0.0012047714553773403,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0014,
|
|
"num_tokens": 268308416.0,
|
|
"reward": 1.0095821619033813,
|
|
"reward_std": 0.10738990157842636,
|
|
"rewards/accuracy_reward": 0.56796875,
|
|
"rewards/brier_reward": 0.7930272340774536,
|
|
"rewards/confidence_uniqueness_reward": 0.8817017793655395,
|
|
"rewards/format_reward": 0.99873046875,
|
|
"rewards/frontier_aurc_reward": -0.0023847362026572227,
|
|
"rewards/frontier_coverage_1": 0.12308522313833237,
|
|
"rewards/frontier_coverage_10": 0.12308522313833237,
|
|
"rewards/frontier_coverage_15": 0.12308522313833237,
|
|
"rewards/frontier_coverage_20": 0.12308522313833237,
|
|
"rewards/frontier_coverage_25": 0.12308522313833237,
|
|
"rewards/frontier_coverage_5": 0.12308522313833237,
|
|
"rewards/frontier_ece_reward": 0.029718055576086044,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1286376953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.16931941509246826,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.515625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06431884765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06431884765625,
|
|
"signal/advantage_abs_mean": 0.08091795295476914,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08091795295476914,
|
|
"signal/advantage_pre_scale_std": 0.13172105848789215,
|
|
"signal/advantage_std": 0.13172105848789215,
|
|
"signal/brier_reward/centered_abs_mean": 0.17564865946769714,
|
|
"signal/brier_reward/group_std_mean": 0.22503041923046113,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021956082433462143,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021956082433462143,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06999158263206481,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08747024983167648,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008748947829008102,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008748947829008102,
|
|
"signal/format_reward/centered_abs_mean": 0.002459716796875,
|
|
"signal/format_reward/group_std_mean": 0.00718155293725431,
|
|
"signal/format_reward/group_zero_std_frac": 0.959375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012298583984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0012298583984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032616748940199614,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0050237664021551606,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.8383979194331914e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.8383979194331914e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2024263024330139,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26835508942604064,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003623430663719773,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003623430663719773,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2024263024330139,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26835508942604064,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003623430663719773,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003623430663719773,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2024263024330139,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26835508942604064,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003623430663719773,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003623430663719773,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2024263024330139,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.26835508942604064,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003623430663719773,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003623430663719773,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2024263024330139,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.26835508942604064,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003623430663719773,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003623430663719773,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2024263024330139,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.26835508942604064,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003623430663719773,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003623430663719773,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.032590297609567644,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0412607304751873,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0040737872011959554,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0040737872011959554,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2642183850892495,
|
|
"calibration/batch_distribution_entropy": 0.9123858748662885,
|
|
"calibration/buffer_distribution_entropy": 0.9324726343473598,
|
|
"calibration/confidence_entropy": 0.3785149112394486,
|
|
"calibration/coverage@0%": 0.012890625,
|
|
"calibration/coverage@1%": 0.012890625,
|
|
"calibration/coverage@10%": 0.116015625,
|
|
"calibration/coverage@15%": 0.3109375,
|
|
"calibration/coverage@20%": 0.408984375,
|
|
"calibration/coverage@25%": 0.5415048617906066,
|
|
"calibration/coverage@30%": 0.6079348091976516,
|
|
"calibration/coverage@5%": 0.012890625,
|
|
"calibration/ece": 0.132531155446651,
|
|
"calibration/mean_confidence": 0.5557964833742884,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1267.4,
|
|
"completions/max_terminated_length": 663.4,
|
|
"completions/mean_length": 179.5677734375,
|
|
"completions/mean_terminated_length": 179.03648986816407,
|
|
"completions/min_length": 84.6,
|
|
"completions/min_terminated_length": 84.6,
|
|
"epoch": 0.272,
|
|
"grad_norm": 0.0008821667870506644,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 285112886.0,
|
|
"reward": 1.0122202634811401,
|
|
"reward_std": 0.103294475376606,
|
|
"rewards/accuracy_reward": 0.5728515625,
|
|
"rewards/brier_reward": 0.7857265949249268,
|
|
"rewards/confidence_uniqueness_reward": 0.9025682806968689,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.002512221224606037,
|
|
"rewards/frontier_coverage_1": 0.10919135846197606,
|
|
"rewards/frontier_coverage_10": 0.10919135846197606,
|
|
"rewards/frontier_coverage_15": 0.10919135846197606,
|
|
"rewards/frontier_coverage_20": 0.10919135846197606,
|
|
"rewards/frontier_coverage_25": 0.10919135846197606,
|
|
"rewards/frontier_coverage_5": 0.10919135846197606,
|
|
"rewards/frontier_ece_reward": 0.02616579309105873,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12288818359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.15963667035102844,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.553125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.061444091796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.061444091796875,
|
|
"signal/advantage_abs_mean": 0.08039057850837708,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08039057850837708,
|
|
"signal/advantage_pre_scale_std": 0.12777341157197952,
|
|
"signal/advantage_std": 0.12777341157197952,
|
|
"signal/brier_reward/centered_abs_mean": 0.17774806618690492,
|
|
"signal/brier_reward/group_std_mean": 0.22448875308036803,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022218508273363115,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022218508273363115,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.055553752928972244,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06775816455483437,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0069442191161215305,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0069442191161215305,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031457245349884033,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004772697854787111,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.630846717394888e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.630846717394888e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20491620600223542,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2665486991405487,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003667999850586057,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003667999850586057,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20491620600223542,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2665486991405487,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003667999850586057,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003667999850586057,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20491620600223542,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2665486991405487,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003667999850586057,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003667999850586057,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20491620600223542,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2665486991405487,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003667999850586057,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003667999850586057,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20491620600223542,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2665486991405487,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003667999850586057,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003667999850586057,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20491620600223542,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2665486991405487,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003667999850586057,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003667999850586057,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.030647655576467515,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03821772038936615,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0038309569470584394,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0038309569470584394,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.32164312709967285,
|
|
"calibration/batch_distribution_entropy": 0.889030555552942,
|
|
"calibration/buffer_distribution_entropy": 0.9340647660701571,
|
|
"calibration/confidence_entropy": 0.3723696647266723,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.064453125,
|
|
"calibration/coverage@15%": 0.10546875,
|
|
"calibration/coverage@20%": 0.18671875,
|
|
"calibration/coverage@25%": 0.279296875,
|
|
"calibration/coverage@30%": 0.40625,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.14947785675533237,
|
|
"calibration/mean_confidence": 0.5987036054648168,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 419.2,
|
|
"completions/max_terminated_length": 419.2,
|
|
"completions/mean_length": 173.05849609375,
|
|
"completions/mean_terminated_length": 173.05849609375,
|
|
"completions/min_length": 81.6,
|
|
"completions/min_terminated_length": 81.6,
|
|
"epoch": 0.288,
|
|
"grad_norm": 0.0011444967240095139,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 301843181.0,
|
|
"reward": 1.0008944511413573,
|
|
"reward_std": 0.10447315275669097,
|
|
"rewards/accuracy_reward": 0.551953125,
|
|
"rewards/brier_reward": 0.7695533514022828,
|
|
"rewards/confidence_uniqueness_reward": 0.9145703554153443,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0030466041062027216,
|
|
"rewards/frontier_coverage_1": 0.10940263271331788,
|
|
"rewards/frontier_coverage_10": 0.10940263271331788,
|
|
"rewards/frontier_coverage_15": 0.10940263271331788,
|
|
"rewards/frontier_coverage_20": 0.10940263271331788,
|
|
"rewards/frontier_coverage_25": 0.10940263271331788,
|
|
"rewards/frontier_coverage_5": 0.10940263271331788,
|
|
"rewards/frontier_ece_reward": 0.022437838092446327,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12567138671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.16594321131706238,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.525,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062835693359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.062835693359375,
|
|
"signal/advantage_abs_mean": 0.07972771823406219,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07972771823406219,
|
|
"signal/advantage_pre_scale_std": 0.12657924741506577,
|
|
"signal/advantage_std": 0.12657924741506577,
|
|
"signal/brier_reward/centered_abs_mean": 0.1829427719116211,
|
|
"signal/brier_reward/group_std_mean": 0.23203744888305664,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022867846488952636,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022867846488952636,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.050545477867126466,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06067455634474754,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006318184733390808,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006318184733390808,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0034512685146182776,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00543216560035944,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.177770264912397e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.177770264912397e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20699847042560576,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2705923795700073,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037052724044770004,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037052724044770004,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20699847042560576,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2705923795700073,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037052724044770004,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037052724044770004,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20699847042560576,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2705923795700073,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037052724044770004,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037052724044770004,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20699847042560576,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2705923795700073,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037052724044770004,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037052724044770004,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20699847042560576,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2705923795700073,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037052724044770004,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037052724044770004,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20699847042560576,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2705923795700073,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037052724044770004,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037052724044770004,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.029942670091986655,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.037430693954229356,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003742833761498332,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003742833761498332,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21649904704687412,
|
|
"calibration/batch_distribution_entropy": 0.8802667061478353,
|
|
"calibration/buffer_distribution_entropy": 0.9338496829298666,
|
|
"calibration/confidence_entropy": 0.3712671385467933,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.026953125,
|
|
"calibration/coverage@15%": 0.2890625,
|
|
"calibration/coverage@20%": 0.645703125,
|
|
"calibration/coverage@25%": 0.769140625,
|
|
"calibration/coverage@30%": 0.862109375,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.11950063296604121,
|
|
"calibration/mean_confidence": 0.6085048357839588,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 773.0,
|
|
"completions/max_terminated_length": 563.2,
|
|
"completions/mean_length": 173.11982421875,
|
|
"completions/mean_terminated_length": 172.98677978515624,
|
|
"completions/min_length": 78.2,
|
|
"completions/min_terminated_length": 78.2,
|
|
"epoch": 0.304,
|
|
"grad_norm": 0.0012024985626339912,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 318545880.0,
|
|
"reward": 1.0252812027931213,
|
|
"reward_std": 0.10164082497358322,
|
|
"rewards/accuracy_reward": 0.5994140625,
|
|
"rewards/brier_reward": 0.7789253473281861,
|
|
"rewards/confidence_uniqueness_reward": 0.9277184486389161,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0023648647125810384,
|
|
"rewards/frontier_coverage_1": 0.0874197174794972,
|
|
"rewards/frontier_coverage_10": 0.0874197174794972,
|
|
"rewards/frontier_coverage_15": 0.0874197174794972,
|
|
"rewards/frontier_coverage_20": 0.0874197174794972,
|
|
"rewards/frontier_coverage_25": 0.0874197174794972,
|
|
"rewards/frontier_coverage_5": 0.0874197174794972,
|
|
"rewards/frontier_ece_reward": 0.02356785014271736,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12322998046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1658935070037842,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.515625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.061614990234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.061614990234375,
|
|
"signal/advantage_abs_mean": 0.07720276862382888,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07720276862382888,
|
|
"signal/advantage_pre_scale_std": 0.12322989106178284,
|
|
"signal/advantage_std": 0.12322989106178284,
|
|
"signal/brier_reward/centered_abs_mean": 0.17541297674179077,
|
|
"signal/brier_reward/group_std_mean": 0.22342281341552733,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021926622092723846,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021926622092723846,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0434452086687088,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05217555984854698,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0054306510835886,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0054306510835886,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002743481518700719,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004372889362275601,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.910831557936035e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.910831557936035e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2050688624382019,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27132690250873565,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036707324907183647,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036707324907183647,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2050688624382019,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27132690250873565,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036707324907183647,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036707324907183647,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2050688624382019,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27132690250873565,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036707324907183647,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036707324907183647,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2050688624382019,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27132690250873565,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036707324907183647,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036707324907183647,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2050688624382019,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27132690250873565,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036707324907183647,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036707324907183647,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2050688624382019,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27132690250873565,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036707324907183647,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036707324907183647,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.028253377601504325,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0347771979868412,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035316722001880406,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035316722001880406,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2098873289679891,
|
|
"calibration/batch_distribution_entropy": 0.8840538637302338,
|
|
"calibration/buffer_distribution_entropy": 0.9334977967028385,
|
|
"calibration/confidence_entropy": 0.3802012123632416,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.153515625,
|
|
"calibration/coverage@15%": 0.375,
|
|
"calibration/coverage@20%": 0.535546875,
|
|
"calibration/coverage@25%": 0.64609375,
|
|
"calibration/coverage@30%": 0.844921875,
|
|
"calibration/coverage@5%": 0.00859375,
|
|
"calibration/ece": 0.1021149372829088,
|
|
"calibration/mean_confidence": 0.6126254173994252,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 759.6,
|
|
"completions/max_terminated_length": 534.4,
|
|
"completions/mean_length": 171.9232421875,
|
|
"completions/mean_terminated_length": 171.7903839111328,
|
|
"completions/min_length": 77.6,
|
|
"completions/min_terminated_length": 77.6,
|
|
"epoch": 0.32,
|
|
"grad_norm": 0.000909713504370302,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 335395078.0,
|
|
"reward": 1.0206497073173524,
|
|
"reward_std": 0.0881238341331482,
|
|
"rewards/accuracy_reward": 0.577734375,
|
|
"rewards/brier_reward": 0.7934822678565979,
|
|
"rewards/confidence_uniqueness_reward": 0.9379700899124146,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.00240150757599622,
|
|
"rewards/frontier_coverage_1": 0.11633307486772537,
|
|
"rewards/frontier_coverage_10": 0.11633307486772537,
|
|
"rewards/frontier_coverage_15": 0.11633307486772537,
|
|
"rewards/frontier_coverage_20": 0.11633307486772537,
|
|
"rewards/frontier_coverage_25": 0.11633307486772537,
|
|
"rewards/frontier_coverage_5": 0.11633307486772537,
|
|
"rewards/frontier_ece_reward": 0.023979850485920905,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09036865234375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12911975234746934,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045184326171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045184326171875,
|
|
"signal/advantage_abs_mean": 0.06487039029598236,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06487039029598236,
|
|
"signal/advantage_pre_scale_std": 0.11023673117160797,
|
|
"signal/advantage_std": 0.11023673117160797,
|
|
"signal/brier_reward/centered_abs_mean": 0.16053855717182158,
|
|
"signal/brier_reward/group_std_mean": 0.20720755457878112,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020067319646477698,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020067319646477698,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037102106213569644,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04544886723160744,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0046377632766962055,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0046377632766962055,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025523790158331395,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004083980154246092,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.568758231471293e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.568758231471293e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18334722518920898,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2422294318675995,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003281915234401822,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003281915234401822,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18334722518920898,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2422294318675995,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003281915234401822,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003281915234401822,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18334722518920898,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2422294318675995,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003281915234401822,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003281915234401822,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18334722518920898,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2422294318675995,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003281915234401822,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003281915234401822,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18334722518920898,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2422294318675995,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003281915234401822,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003281915234401822,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18334722518920898,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2422294318675995,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003281915234401822,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003281915234401822,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.025719008594751357,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03179643303155899,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032148760743439197,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032148760743439197,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"eval_calibration/aurc": 0.5391036516427652,
|
|
"eval_calibration/batch_distribution_entropy": 0.896161072633235,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9332897449793491,
|
|
"eval_calibration/confidence_entropy": 0.3928585129324643,
|
|
"eval_calibration/coverage@0%": 0.0234375,
|
|
"eval_calibration/coverage@1%": 0.0234375,
|
|
"eval_calibration/coverage@10%": 0.0234375,
|
|
"eval_calibration/coverage@15%": 0.0234375,
|
|
"eval_calibration/coverage@20%": 0.0234375,
|
|
"eval_calibration/coverage@25%": 0.0546875,
|
|
"eval_calibration/coverage@30%": 0.1328125,
|
|
"eval_calibration/coverage@5%": 0.0234375,
|
|
"eval_calibration/ece": 0.31548437500000004,
|
|
"eval_calibration/mean_confidence": 0.5214375,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 318.0,
|
|
"eval_completions/max_terminated_length": 318.0,
|
|
"eval_completions/mean_length": 174.37715530395508,
|
|
"eval_completions/mean_terminated_length": 174.37715530395508,
|
|
"eval_completions/min_length": 94.0,
|
|
"eval_completions/min_terminated_length": 94.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 335395078.0,
|
|
"eval_reward": 0.9348780065774918,
|
|
"eval_reward_std": 0.2365853264927864,
|
|
"eval_rewards/accuracy_reward": 0.400390625,
|
|
"eval_rewards/brier_reward": 0.7740589827299118,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.888916015625,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0038473134045489132,
|
|
"eval_rewards/frontier_coverage_1": 0.23241731896996498,
|
|
"eval_rewards/frontier_coverage_10": 0.23241731896996498,
|
|
"eval_rewards/frontier_coverage_15": 0.23241731896996498,
|
|
"eval_rewards/frontier_coverage_20": 0.23241731896996498,
|
|
"eval_rewards/frontier_coverage_25": 0.23241731896996498,
|
|
"eval_rewards/frontier_coverage_5": 0.23241731896996498,
|
|
"eval_rewards/frontier_ece_reward": 0.015344643266871572,
|
|
"eval_runtime": 18.5787,
|
|
"eval_samples_per_second": 26.913,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4688720703125,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49159620702266693,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23443603515625,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23443603515625,
|
|
"eval_signal/advantage_abs_mean": 0.21387740224599838,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21387740224599838,
|
|
"eval_signal/advantage_pre_scale_std": 0.23405225947499275,
|
|
"eval_signal/advantage_std": 0.23405225947499275,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.25311582535505295,
|
|
"eval_signal/brier_reward/group_std_mean": 0.31092750281095505,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03163947816938162,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.03163947816938162,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.051788330078125,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06362179387360811,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006473541259765625,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006473541259765625,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004530601087026298,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0076346289133653045,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.109775626508053e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.109775626508053e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.37415503710508347,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.45467519015073776,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006697374978102744,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006697374978102744,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.37415503710508347,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.45467519015073776,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006697374978102744,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006697374978102744,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.37415503710508347,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.45467519015073776,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006697374978102744,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006697374978102744,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.37415503710508347,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.45467519015073776,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006697374978102744,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006697374978102744,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.37415503710508347,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.45467519015073776,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006697374978102744,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006697374978102744,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.37415503710508347,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.45467519015073776,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006697374978102744,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006697374978102744,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.035073693841695786,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.046107963658869267,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004384211730211973,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004384211730211973,
|
|
"eval_steps_per_second": 0.215,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"step": 100,
|
|
"train_probe_calibration/aurc": 0.2376980377133199,
|
|
"train_probe_calibration/batch_distribution_entropy": 0.8460000670916705,
|
|
"train_probe_calibration/buffer_distribution_entropy": 0.9334354664119227,
|
|
"train_probe_calibration/confidence_entropy": 0.3949835291522682,
|
|
"train_probe_calibration/coverage@0%": 0.140625,
|
|
"train_probe_calibration/coverage@1%": 0.140625,
|
|
"train_probe_calibration/coverage@10%": 0.1953125,
|
|
"train_probe_calibration/coverage@15%": 0.28125,
|
|
"train_probe_calibration/coverage@20%": 0.5078125,
|
|
"train_probe_calibration/coverage@25%": 0.609375,
|
|
"train_probe_calibration/coverage@30%": 0.703125,
|
|
"train_probe_calibration/coverage@5%": 0.140625,
|
|
"train_probe_calibration/ece": 0.19056250000000002,
|
|
"train_probe_calibration/mean_confidence": 0.5500625,
|
|
"train_probe_completions/clipped_ratio": 0.0,
|
|
"train_probe_completions/max_length": 351.25,
|
|
"train_probe_completions/max_terminated_length": 351.25,
|
|
"train_probe_completions/mean_length": 172.39978408813477,
|
|
"train_probe_completions/mean_terminated_length": 172.39978408813477,
|
|
"train_probe_completions/min_length": 90.25,
|
|
"train_probe_completions/min_terminated_length": 90.25,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 335395078.0,
|
|
"train_probe_reward": 1.0273381769657135,
|
|
"train_probe_reward_std": 0.2266932986676693,
|
|
"train_probe_rewards/accuracy_reward": 0.60546875,
|
|
"train_probe_rewards/brier_reward": 0.7994071692228317,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.88720703125,
|
|
"train_probe_rewards/format_reward": 1.0,
|
|
"train_probe_rewards/frontier_aurc_reward": -0.0018720118096098304,
|
|
"train_probe_rewards/frontier_coverage_1": 0.09955346956849098,
|
|
"train_probe_rewards/frontier_coverage_10": 0.09955346956849098,
|
|
"train_probe_rewards/frontier_coverage_15": 0.09955346956849098,
|
|
"train_probe_rewards/frontier_coverage_20": 0.09955346956849098,
|
|
"train_probe_rewards/frontier_coverage_25": 0.09955346956849098,
|
|
"train_probe_rewards/frontier_coverage_5": 0.09955346956849098,
|
|
"train_probe_rewards/frontier_ece_reward": 0.024947880767285824,
|
|
"train_probe_runtime": 19.5457,
|
|
"train_probe_samples_per_second": 25.581,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.468505859375,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.49154773354530334,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2342529296875,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2342529296875,
|
|
"train_probe_signal/advantage_abs_mean": 0.20661941543221474,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.20661941543221474,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.2241741679608822,
|
|
"train_probe_signal/advantage_std": 0.2241741679608822,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.231151282787323,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.2955388203263283,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028893910348415375,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.028893910348415375,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0560455322265625,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0693097673356533,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0070056915283203125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0070056915283203125,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0028542151558212936,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.005022436263971031,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.109044832352083e-05,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.109044832352083e-05,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3502242639660835,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.4751490503549576,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006269014091230929,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006269014091230929,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.3502242639660835,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.4751490503549576,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006269014091230929,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006269014091230929,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.3502242639660835,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.4751490503549576,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006269014091230929,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006269014091230929,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.3502242639660835,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.4751490503549576,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006269014091230929,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006269014091230929,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.3502242639660835,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.4751490503549576,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006269014091230929,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006269014091230929,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3502242639660835,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.4751490503549576,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006269014091230929,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006269014091230929,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.034564562141895294,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.043938882648944855,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004320570267736912,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004320570267736912,
|
|
"train_probe_steps_per_second": 0.205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26481606899320165,
|
|
"calibration/batch_distribution_entropy": 0.911049416340618,
|
|
"calibration/buffer_distribution_entropy": 0.9370308540267022,
|
|
"calibration/confidence_entropy": 0.3856464288010409,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.094140625,
|
|
"calibration/coverage@15%": 0.33984375,
|
|
"calibration/coverage@20%": 0.397265625,
|
|
"calibration/coverage@25%": 0.46796875,
|
|
"calibration/coverage@30%": 0.61328125,
|
|
"calibration/coverage@5%": 0.05859375,
|
|
"calibration/ece": 0.1609574228715549,
|
|
"calibration/mean_confidence": 0.5372468384372941,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 650.6,
|
|
"completions/max_terminated_length": 446.6,
|
|
"completions/mean_length": 171.29970703125,
|
|
"completions/mean_terminated_length": 171.1662384033203,
|
|
"completions/min_length": 78.0,
|
|
"completions/min_terminated_length": 78.0,
|
|
"epoch": 0.336,
|
|
"grad_norm": 0.0009579297038726509,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 351871619.0,
|
|
"reward": 1.026773464679718,
|
|
"reward_std": 0.0938282698392868,
|
|
"rewards/accuracy_reward": 0.58876953125,
|
|
"rewards/brier_reward": 0.7977856278419495,
|
|
"rewards/confidence_uniqueness_reward": 0.9435371160507202,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0020208923146128655,
|
|
"rewards/frontier_coverage_1": 0.1127518393099308,
|
|
"rewards/frontier_coverage_10": 0.1127518393099308,
|
|
"rewards/frontier_coverage_15": 0.1127518393099308,
|
|
"rewards/frontier_coverage_20": 0.1127518393099308,
|
|
"rewards/frontier_coverage_25": 0.1127518393099308,
|
|
"rewards/frontier_coverage_5": 0.1127518393099308,
|
|
"rewards/frontier_ece_reward": 0.021590472385287284,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.114825439453125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1522398740053177,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.565625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0574127197265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0574127197265625,
|
|
"signal/advantage_abs_mean": 0.07198196649551392,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07198196649551392,
|
|
"signal/advantage_pre_scale_std": 0.11692783534526825,
|
|
"signal/advantage_std": 0.11692783534526825,
|
|
"signal/brier_reward/centered_abs_mean": 0.16734021306037902,
|
|
"signal/brier_reward/group_std_mean": 0.21267004311084747,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020917526632547378,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020917526632547378,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03207938522100449,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04001001343131065,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004009923152625561,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004009923152625561,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021244912641122937,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0033777955919504165,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.802839128184132e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.802839128184132e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20386078357696533,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2637325257062912,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036491078790277243,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036491078790277243,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20386078357696533,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2637325257062912,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036491078790277243,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036491078790277243,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20386078357696533,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2637325257062912,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036491078790277243,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036491078790277243,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20386078357696533,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2637325257062912,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036491078790277243,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036491078790277243,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20386078357696533,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2637325257062912,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036491078790277243,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036491078790277243,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20386078357696533,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2637325257062912,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036491078790277243,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036491078790277243,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.022904927283525466,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.028568074852228165,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002863115910440683,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002863115910440683,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2918670757064623,
|
|
"calibration/batch_distribution_entropy": 0.8617419290524296,
|
|
"calibration/buffer_distribution_entropy": 0.9459875684518371,
|
|
"calibration/confidence_entropy": 0.3511704141965763,
|
|
"calibration/coverage@0%": 0.0125,
|
|
"calibration/coverage@1%": 0.0125,
|
|
"calibration/coverage@10%": 0.226953125,
|
|
"calibration/coverage@15%": 0.337890625,
|
|
"calibration/coverage@20%": 0.399609375,
|
|
"calibration/coverage@25%": 0.4484375,
|
|
"calibration/coverage@30%": 0.512109375,
|
|
"calibration/coverage@5%": 0.0890625,
|
|
"calibration/ece": 0.12058771609022725,
|
|
"calibration/mean_confidence": 0.45003809801340733,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 635.8,
|
|
"completions/max_terminated_length": 446.2,
|
|
"completions/mean_length": 170.15615234375,
|
|
"completions/mean_terminated_length": 170.0229278564453,
|
|
"completions/min_length": 75.2,
|
|
"completions/min_terminated_length": 75.2,
|
|
"epoch": 0.352,
|
|
"grad_norm": 0.0009440815192647278,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 368874434.0,
|
|
"reward": 0.9951952695846558,
|
|
"reward_std": 0.08828350454568863,
|
|
"rewards/accuracy_reward": 0.52373046875,
|
|
"rewards/brier_reward": 0.784073281288147,
|
|
"rewards/confidence_uniqueness_reward": 0.9385409832000733,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0024972121231257916,
|
|
"rewards/frontier_coverage_1": 0.15070441961288453,
|
|
"rewards/frontier_coverage_10": 0.15070441961288453,
|
|
"rewards/frontier_coverage_15": 0.15070441961288453,
|
|
"rewards/frontier_coverage_20": 0.15070441961288453,
|
|
"rewards/frontier_coverage_25": 0.15070441961288453,
|
|
"rewards/frontier_coverage_5": 0.15070441961288453,
|
|
"rewards/frontier_ece_reward": 0.015288973599672318,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.105633544921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.14147602766752243,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.584375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0528167724609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0528167724609375,
|
|
"signal/advantage_abs_mean": 0.06714115589857102,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06714115589857102,
|
|
"signal/advantage_pre_scale_std": 0.1110717460513115,
|
|
"signal/advantage_std": 0.1110717460513115,
|
|
"signal/brier_reward/centered_abs_mean": 0.1665810763835907,
|
|
"signal/brier_reward/group_std_mean": 0.21240653693675995,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02082263454794884,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02082263454794884,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03552674874663353,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0450509749352932,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004440843593329191,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004440843593329191,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002249357360415161,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003604770079255104,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.026349415653385e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.026349415653385e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2058982342481613,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2676191568374634,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003685578191652894,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003685578191652894,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2058982342481613,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2676191568374634,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003685578191652894,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003685578191652894,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2058982342481613,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2676191568374634,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003685578191652894,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003685578191652894,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2058982342481613,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2676191568374634,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003685578191652894,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003685578191652894,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2058982342481613,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2676191568374634,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003685578191652894,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003685578191652894,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2058982342481613,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2676191568374634,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003685578191652894,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003685578191652894,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.018955815210938455,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.023499416932463647,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002369476901367307,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002369476901367307,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26006198370363565,
|
|
"calibration/batch_distribution_entropy": 0.8909601189119198,
|
|
"calibration/buffer_distribution_entropy": 0.9543928456982405,
|
|
"calibration/confidence_entropy": 0.3737341235551185,
|
|
"calibration/coverage@0%": 0.01603626467710372,
|
|
"calibration/coverage@1%": 0.01603626467710372,
|
|
"calibration/coverage@10%": 0.20185680650684928,
|
|
"calibration/coverage@15%": 0.28745031188845405,
|
|
"calibration/coverage@20%": 0.4328125,
|
|
"calibration/coverage@25%": 0.584765625,
|
|
"calibration/coverage@30%": 0.668359375,
|
|
"calibration/coverage@5%": 0.05321826076320939,
|
|
"calibration/ece": 0.12706476116156798,
|
|
"calibration/mean_confidence": 0.5286066296783638,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 687.6,
|
|
"completions/max_terminated_length": 478.2,
|
|
"completions/mean_length": 167.1548828125,
|
|
"completions/mean_terminated_length": 167.02129821777345,
|
|
"completions/min_length": 76.8,
|
|
"completions/min_terminated_length": 76.8,
|
|
"epoch": 0.368,
|
|
"grad_norm": 0.0009902457240968943,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 385651572.0,
|
|
"reward": 1.0256622314453125,
|
|
"reward_std": 0.08201654255390167,
|
|
"rewards/accuracy_reward": 0.57900390625,
|
|
"rewards/brier_reward": 0.8139191031455993,
|
|
"rewards/confidence_uniqueness_reward": 0.9448665976524353,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0020156882936134936,
|
|
"rewards/frontier_coverage_1": 0.13771790713071824,
|
|
"rewards/frontier_coverage_10": 0.13771790713071824,
|
|
"rewards/frontier_coverage_15": 0.13771790713071824,
|
|
"rewards/frontier_coverage_20": 0.13771790713071824,
|
|
"rewards/frontier_coverage_25": 0.12578624486923218,
|
|
"rewards/frontier_coverage_5": 0.13771790713071824,
|
|
"rewards/frontier_ece_reward": 0.015338649787008763,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.090032958984375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12576899230480193,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0450164794921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0450164794921875,
|
|
"signal/advantage_abs_mean": 0.06141816526651382,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06141816526651382,
|
|
"signal/advantage_pre_scale_std": 0.10529383420944213,
|
|
"signal/advantage_std": 0.10529383420944213,
|
|
"signal/brier_reward/centered_abs_mean": 0.15247377157211303,
|
|
"signal/brier_reward/group_std_mean": 0.19677919149398804,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01905922144651413,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01905922144651413,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030233245342969894,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.039144163578748704,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003779155667871237,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003779155667871237,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021210510516539217,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0034853797405958176,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.796681339736096e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.796681339736096e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18898755609989165,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24688771665096282,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033828773070126773,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033828773070126773,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18898755609989165,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24688771665096282,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033828773070126773,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033828773070126773,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18898755609989165,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24688771665096282,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033828773070126773,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033828773070126773,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18898755609989165,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24688771665096282,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033828773070126773,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033828773070126773,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16050014942884444,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.21017540693283082,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002872952586039901,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002872952586039901,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18898755609989165,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24688771665096282,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033828773070126773,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033828773070126773,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.015096320770680904,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01860468164086342,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001887040096335113,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001887040096335113,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24020855256031118,
|
|
"calibration/batch_distribution_entropy": 0.8819246681337859,
|
|
"calibration/buffer_distribution_entropy": 0.9595033013104548,
|
|
"calibration/confidence_entropy": 0.36571512554246827,
|
|
"calibration/coverage@0%": 0.01640625,
|
|
"calibration/coverage@1%": 0.01640625,
|
|
"calibration/coverage@10%": 0.266015625,
|
|
"calibration/coverage@15%": 0.35078125,
|
|
"calibration/coverage@20%": 0.422265625,
|
|
"calibration/coverage@25%": 0.48203125,
|
|
"calibration/coverage@30%": 0.680859375,
|
|
"calibration/coverage@5%": 0.18125,
|
|
"calibration/ece": 0.13183292817767742,
|
|
"calibration/mean_confidence": 0.4883596199957732,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 977.6,
|
|
"completions/max_terminated_length": 643.6,
|
|
"completions/mean_length": 162.63994140625,
|
|
"completions/mean_terminated_length": 162.3724334716797,
|
|
"completions/min_length": 72.4,
|
|
"completions/min_terminated_length": 72.4,
|
|
"epoch": 0.384,
|
|
"grad_norm": 0.0009414847008883953,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0015,
|
|
"num_tokens": 402173517.0,
|
|
"reward": 1.028240156173706,
|
|
"reward_std": 0.08490664958953857,
|
|
"rewards/accuracy_reward": 0.5876953125,
|
|
"rewards/brier_reward": 0.8151444077491761,
|
|
"rewards/confidence_uniqueness_reward": 0.9464893937110901,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.002180484868586063,
|
|
"rewards/frontier_coverage_1": 0.12647273913025855,
|
|
"rewards/frontier_coverage_10": 0.12647273913025855,
|
|
"rewards/frontier_coverage_15": 0.12647273913025855,
|
|
"rewards/frontier_coverage_20": 0.12413697615265847,
|
|
"rewards/frontier_coverage_25": 0.08527236208319663,
|
|
"rewards/frontier_coverage_5": 0.12647273913025855,
|
|
"rewards/frontier_ece_reward": 0.012559224478900432,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10203857421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.139366614818573,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051019287109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051019287109375,
|
|
"signal/advantage_abs_mean": 0.06343221440911292,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06343221440911292,
|
|
"signal/advantage_pre_scale_std": 0.11096447557210923,
|
|
"signal/advantage_std": 0.11096447557210923,
|
|
"signal/brier_reward/centered_abs_mean": 0.14464540481567384,
|
|
"signal/brier_reward/group_std_mean": 0.18787792026996614,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01808067560195923,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01808067560195923,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02793549485504627,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0359924353659153,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003491936856880784,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003491936856880784,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002523245778866112,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0043451421894133094,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5166096970206124e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5166096970206124e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17531461119651795,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2309555232524872,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031381313689053058,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031381313689053058,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17531461119651795,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2309555232524872,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031381313689053058,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031381313689053058,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17531461119651795,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2309555232524872,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031381313689053058,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031381313689053058,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17357857525348663,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22865375578403474,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003107056301087141,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003107056301087141,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10947014093399048,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14513879716396333,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019595154793933035,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019595154793933035,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17531461119651795,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2309555232524872,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031381313689053058,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031381313689053058,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01200645174831152,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01493366789072752,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00150080646853894,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00150080646853894,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2844994718888988,
|
|
"calibration/batch_distribution_entropy": 0.9237969701468984,
|
|
"calibration/buffer_distribution_entropy": 0.9619282325349795,
|
|
"calibration/confidence_entropy": 0.4142039763975651,
|
|
"calibration/coverage@0%": 0.011721043297455968,
|
|
"calibration/coverage@1%": 0.011721043297455968,
|
|
"calibration/coverage@10%": 0.03597572162426614,
|
|
"calibration/coverage@15%": 0.10791034735812133,
|
|
"calibration/coverage@20%": 0.3302172517123288,
|
|
"calibration/coverage@25%": 0.4877041034735813,
|
|
"calibration/coverage@30%": 0.5815114359099804,
|
|
"calibration/coverage@5%": 0.032460096624266147,
|
|
"calibration/ece": 0.1442927572230528,
|
|
"calibration/mean_confidence": 0.5743742787719365,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 617.0,
|
|
"completions/max_terminated_length": 428.4,
|
|
"completions/mean_length": 160.984765625,
|
|
"completions/mean_terminated_length": 160.8504638671875,
|
|
"completions/min_length": 76.0,
|
|
"completions/min_terminated_length": 76.0,
|
|
"epoch": 0.4,
|
|
"grad_norm": 0.001233375514857471,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 418858449.0,
|
|
"reward": 1.0142970323562621,
|
|
"reward_std": 0.09367316514253617,
|
|
"rewards/accuracy_reward": 0.56884765625,
|
|
"rewards/brier_reward": 0.7930649518966675,
|
|
"rewards/confidence_uniqueness_reward": 0.9494104027748108,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002801778074353933,
|
|
"rewards/frontier_coverage_1": 0.111895702034235,
|
|
"rewards/frontier_coverage_10": 0.111895702034235,
|
|
"rewards/frontier_coverage_15": 0.111895702034235,
|
|
"rewards/frontier_coverage_20": 0.09835360199213028,
|
|
"rewards/frontier_coverage_25": 0.07011410780251026,
|
|
"rewards/frontier_coverage_5": 0.111895702034235,
|
|
"rewards/frontier_ece_reward": 0.009083650819957257,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.118499755859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.16050875782966614,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.525,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0592498779296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0592498779296875,
|
|
"signal/advantage_abs_mean": 0.0702929750084877,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0702929750084877,
|
|
"signal/advantage_pre_scale_std": 0.12010517567396164,
|
|
"signal/advantage_std": 0.12010517567396164,
|
|
"signal/brier_reward/centered_abs_mean": 0.15167818665504457,
|
|
"signal/brier_reward/group_std_mean": 0.19488880634307862,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01895977333188057,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01895977333188057,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026428508386015893,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03375169932842255,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033035635482519866,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033035635482519866,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002832173928618431,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004560053441673517,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.069591134088114e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.069591134088114e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1735062450170517,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22942977547645568,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003105761716142297,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003105761716142297,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1735062450170517,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22942977547645568,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003105761716142297,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003105761716142297,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1735062450170517,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22942977547645568,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003105761716142297,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003105761716142297,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.14573751091957093,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19352927505970002,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026087014470249415,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026087014470249415,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08992226272821427,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11867372989654541,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016096084378659724,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016096084378659724,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1735062450170517,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22942977547645568,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003105761716142297,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003105761716142297,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010532907396554946,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.013023488782346248,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013166134245693683,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013166134245693683,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27468807423750957,
|
|
"calibration/batch_distribution_entropy": 0.9489113413581535,
|
|
"calibration/buffer_distribution_entropy": 0.9616886407581575,
|
|
"calibration/confidence_entropy": 0.44173120087722995,
|
|
"calibration/coverage@0%": 0.008597572162426615,
|
|
"calibration/coverage@1%": 0.008597572162426615,
|
|
"calibration/coverage@10%": 0.1097694471624266,
|
|
"calibration/coverage@15%": 0.18712007705479453,
|
|
"calibration/coverage@20%": 0.27618334148727985,
|
|
"calibration/coverage@25%": 0.4500871453033268,
|
|
"calibration/coverage@30%": 0.6063937133072408,
|
|
"calibration/coverage@5%": 0.032816322162426616,
|
|
"calibration/ece": 0.10054416984275531,
|
|
"calibration/mean_confidence": 0.5599183650598022,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 699.2,
|
|
"completions/max_terminated_length": 499.0,
|
|
"completions/mean_length": 161.9859375,
|
|
"completions/mean_terminated_length": 161.85264892578124,
|
|
"completions/min_length": 76.2,
|
|
"completions/min_terminated_length": 76.2,
|
|
"epoch": 0.416,
|
|
"grad_norm": 0.0010082739172503352,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 435398369.0,
|
|
"reward": 1.0111759543418883,
|
|
"reward_std": 0.0877716675400734,
|
|
"rewards/accuracy_reward": 0.5583984375,
|
|
"rewards/brier_reward": 0.7999887347221375,
|
|
"rewards/confidence_uniqueness_reward": 0.9553175568580627,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002307292679324746,
|
|
"rewards/frontier_coverage_1": 0.11955121904611588,
|
|
"rewards/frontier_coverage_10": 0.11955121904611588,
|
|
"rewards/frontier_coverage_15": 0.11955121904611588,
|
|
"rewards/frontier_coverage_20": 0.10541392564773559,
|
|
"rewards/frontier_coverage_25": 0.0678506538271904,
|
|
"rewards/frontier_coverage_5": 0.11955121904611588,
|
|
"rewards/frontier_ece_reward": 0.00793801536783576,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1123291015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1460072174668312,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.59375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05616455078125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05616455078125,
|
|
"signal/advantage_abs_mean": 0.06770573481917382,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06770573481917382,
|
|
"signal/advantage_pre_scale_std": 0.11403226554393768,
|
|
"signal/advantage_std": 0.11403226554393768,
|
|
"signal/brier_reward/centered_abs_mean": 0.15031678080558777,
|
|
"signal/brier_reward/group_std_mean": 0.19109582304954528,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018789597600698472,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018789597600698472,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021022913232445716,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.027267256006598472,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026278641540557145,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026278641540557145,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021706197410821916,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003645438142120838,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.885409387294203e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.885409387294203e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18729844689369202,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24028244614601135,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003352642059326172,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003352642059326172,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18729844689369202,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24028244614601135,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003352642059326172,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003352642059326172,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18729844689369202,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24028244614601135,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003352642059326172,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003352642059326172,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1524550050497055,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19646928310394288,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027289445977658035,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027289445977658035,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08746959716081619,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11204418540000916,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001565705775283277,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001565705775283277,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18729844689369202,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24028244614601135,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003352642059326172,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003352642059326172,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00957362912595272,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01186007559299469,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00119670364074409,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00119670364074409,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2170116955736013,
|
|
"calibration/batch_distribution_entropy": 0.920250529181654,
|
|
"calibration/buffer_distribution_entropy": 0.9593461850745699,
|
|
"calibration/confidence_entropy": 0.41075450223345805,
|
|
"calibration/coverage@0%": 0.03126758194716243,
|
|
"calibration/coverage@1%": 0.03126758194716243,
|
|
"calibration/coverage@10%": 0.30654583537181995,
|
|
"calibration/coverage@15%": 0.4034712879158513,
|
|
"calibration/coverage@20%": 0.49061888454011743,
|
|
"calibration/coverage@25%": 0.6015625,
|
|
"calibration/coverage@30%": 0.695703125,
|
|
"calibration/coverage@5%": 0.22365536325831706,
|
|
"calibration/ece": 0.12825262002892726,
|
|
"calibration/mean_confidence": 0.5729583550437628,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 861.4,
|
|
"completions/max_terminated_length": 403.8,
|
|
"completions/mean_length": 161.29306640625,
|
|
"completions/mean_terminated_length": 161.02431030273436,
|
|
"completions/min_length": 72.0,
|
|
"completions/min_terminated_length": 72.0,
|
|
"epoch": 0.432,
|
|
"grad_norm": 0.0009220021311193705,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 452064346.0,
|
|
"reward": 1.0282735109329224,
|
|
"reward_std": 0.07874491959810256,
|
|
"rewards/accuracy_reward": 0.59111328125,
|
|
"rewards/brier_reward": 0.8130090713500977,
|
|
"rewards/confidence_uniqueness_reward": 0.952438759803772,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0019511275691911577,
|
|
"rewards/frontier_coverage_1": 0.1167385719716549,
|
|
"rewards/frontier_coverage_10": 0.1167385719716549,
|
|
"rewards/frontier_coverage_15": 0.1167385719716549,
|
|
"rewards/frontier_coverage_20": 0.09187164157629013,
|
|
"rewards/frontier_coverage_25": 0.06807960644364357,
|
|
"rewards/frontier_coverage_5": 0.1167385719716549,
|
|
"rewards/frontier_ece_reward": 0.00796552887186408,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.100054931640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.12813358157873153,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0500274658203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0500274658203125,
|
|
"signal/advantage_abs_mean": 0.060691606253385544,
|
|
"signal/advantage_pre_scale_abs_mean": 0.060691606253385544,
|
|
"signal/advantage_pre_scale_std": 0.10658708661794662,
|
|
"signal/advantage_std": 0.10658708661794662,
|
|
"signal/brier_reward/centered_abs_mean": 0.1367792531847954,
|
|
"signal/brier_reward/group_std_mean": 0.17578783333301545,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017097406648099424,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017097406648099424,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023115601390600204,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03054891638457775,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028894501738250256,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028894501738250256,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018829480512067675,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0031504146289080383,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3704767702147365e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3704767702147365e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17200512290000916,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22458739280700685,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030788916628807783,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030788916628807783,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17200512290000916,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22458739280700685,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030788916628807783,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030788916628807783,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17200512290000916,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22458739280700685,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030788916628807783,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030788916628807783,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.12090182155370713,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.15927328169345856,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002164142485707998,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002164142485707998,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07504236698150635,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09683282524347306,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013432582607492804,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013432582607492804,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17200512290000916,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22458739280700685,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030788916628807783,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030788916628807783,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008515550382435321,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01071312427520752,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010644437978044152,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010644437978044152,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2344617037580517,
|
|
"calibration/batch_distribution_entropy": 0.9158115214558646,
|
|
"calibration/buffer_distribution_entropy": 0.9532071558973751,
|
|
"calibration/confidence_entropy": 0.43419576863505027,
|
|
"calibration/coverage@0%": 0.011328125,
|
|
"calibration/coverage@1%": 0.011328125,
|
|
"calibration/coverage@10%": 0.094921875,
|
|
"calibration/coverage@15%": 0.19296875,
|
|
"calibration/coverage@20%": 0.3484375,
|
|
"calibration/coverage@25%": 0.60390625,
|
|
"calibration/coverage@30%": 0.79140625,
|
|
"calibration/coverage@5%": 0.0140625,
|
|
"calibration/ece": 0.11601793193579395,
|
|
"calibration/mean_confidence": 0.6193143780017666,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 448.6,
|
|
"completions/max_terminated_length": 448.6,
|
|
"completions/mean_length": 164.63623046875,
|
|
"completions/mean_terminated_length": 164.63623046875,
|
|
"completions/min_length": 81.6,
|
|
"completions/min_terminated_length": 81.6,
|
|
"epoch": 0.448,
|
|
"grad_norm": 0.0026026626583188772,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 468703021.0,
|
|
"reward": 1.0116289138793946,
|
|
"reward_std": 0.08069856613874435,
|
|
"rewards/accuracy_reward": 0.55439453125,
|
|
"rewards/brier_reward": 0.8098321080207824,
|
|
"rewards/confidence_uniqueness_reward": 0.9554630517959595,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0024544465355575084,
|
|
"rewards/frontier_coverage_1": 0.1361882671713829,
|
|
"rewards/frontier_coverage_10": 0.1361882671713829,
|
|
"rewards/frontier_coverage_15": 0.1361882671713829,
|
|
"rewards/frontier_coverage_20": 0.10684386640787125,
|
|
"rewards/frontier_coverage_25": 0.07347736358642579,
|
|
"rewards/frontier_coverage_5": 0.1361882671713829,
|
|
"rewards/frontier_ece_reward": 0.00746011808514595,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.095538330078125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1277057021856308,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0477691650390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0477691650390625,
|
|
"signal/advantage_abs_mean": 0.06105227619409561,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06105227619409561,
|
|
"signal/advantage_pre_scale_std": 0.10724145770072938,
|
|
"signal/advantage_std": 0.10724145770072938,
|
|
"signal/brier_reward/centered_abs_mean": 0.13868603557348252,
|
|
"signal/brier_reward/group_std_mean": 0.178034707903862,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017335754446685316,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017335754446685316,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0210552129894495,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.027469881996512412,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026319016236811877,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026319016236811877,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002177398931235075,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003708243044093251,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.897543938364833e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.897543938364833e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17191272974014282,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22308169305324554,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030772378202527763,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030772378202527763,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17191272974014282,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22308169305324554,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030772378202527763,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030772378202527763,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17191272974014282,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22308169305324554,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030772378202527763,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030772378202527763,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11961922645568848,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1564598023891449,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021411839872598646,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021411839872598646,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07283841371536255,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09265869408845902,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013038075994700193,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013038075994700193,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17191272974014282,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22308169305324554,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030772378202527763,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030772378202527763,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008390486426651477,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010463342070579529,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010488108033314346,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010488108033314346,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3035030156088047,
|
|
"calibration/batch_distribution_entropy": 0.9494077028090173,
|
|
"calibration/buffer_distribution_entropy": 0.9476631169760029,
|
|
"calibration/confidence_entropy": 0.44092445270135744,
|
|
"calibration/coverage@0%": 0.006259937622309197,
|
|
"calibration/coverage@1%": 0.006259937622309197,
|
|
"calibration/coverage@10%": 0.026572437622309198,
|
|
"calibration/coverage@15%": 0.1161295254403131,
|
|
"calibration/coverage@20%": 0.3525486179060665,
|
|
"calibration/coverage@25%": 0.4424389982876712,
|
|
"calibration/coverage@30%": 0.5639516572896281,
|
|
"calibration/coverage@5%": 0.006259937622309197,
|
|
"calibration/ece": 0.12671306244307387,
|
|
"calibration/mean_confidence": 0.5430596285708069,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1117.8,
|
|
"completions/max_terminated_length": 479.6,
|
|
"completions/mean_length": 163.9982421875,
|
|
"completions/mean_terminated_length": 163.59661254882812,
|
|
"completions/min_length": 77.0,
|
|
"completions/min_terminated_length": 77.0,
|
|
"epoch": 0.464,
|
|
"grad_norm": 0.0009285790147259831,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 485553179.0,
|
|
"reward": 0.9942840814590455,
|
|
"reward_std": 0.07983254492282868,
|
|
"rewards/accuracy_reward": 0.5244140625,
|
|
"rewards/brier_reward": 0.7915266156196594,
|
|
"rewards/confidence_uniqueness_reward": 0.9534668684005737,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0026315408293157818,
|
|
"rewards/frontier_coverage_1": 0.14171417951583862,
|
|
"rewards/frontier_coverage_10": 0.14171417951583862,
|
|
"rewards/frontier_coverage_15": 0.14171417951583862,
|
|
"rewards/frontier_coverage_20": 0.10749387815594673,
|
|
"rewards/frontier_coverage_25": 0.06779449284076691,
|
|
"rewards/frontier_coverage_5": 0.14171417951583862,
|
|
"rewards/frontier_ece_reward": 0.006896446458995342,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0907958984375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1230411022901535,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04539794921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04539794921875,
|
|
"signal/advantage_abs_mean": 0.05986447930335999,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05986447930335999,
|
|
"signal/advantage_pre_scale_std": 0.10530668199062347,
|
|
"signal/advantage_std": 0.10530668199062347,
|
|
"signal/brier_reward/centered_abs_mean": 0.14057967960834503,
|
|
"signal/brier_reward/group_std_mean": 0.18116458952426912,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01757245995104313,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01757245995104313,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022300581261515618,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.029358403012156486,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027875726576894523,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027875726576894523,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002173250797204673,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035278352443128824,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.890118823619559e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.890118823619559e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16866520643234253,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2227775514125824,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030191069934517147,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030191069934517147,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16866520643234253,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2227775514125824,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030191069934517147,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030191069934517147,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16866520643234253,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2227775514125824,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030191069934517147,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030191069934517147,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11727796494960785,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.15594127774238586,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020992755657061935,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020992755657061935,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07200475186109542,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09224026650190353,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012888850411400198,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012888850411400198,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16866520643234253,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2227775514125824,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030191069934517147,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030191069934517147,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00801121462136507,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010085698775947094,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010014018276706337,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010014018276706337,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2490675060818095,
|
|
"calibration/batch_distribution_entropy": 0.9173210190726669,
|
|
"calibration/buffer_distribution_entropy": 0.9421114116879643,
|
|
"calibration/confidence_entropy": 0.4066536328827809,
|
|
"calibration/coverage@0%": 0.00859375,
|
|
"calibration/coverage@1%": 0.00859375,
|
|
"calibration/coverage@10%": 0.02890625,
|
|
"calibration/coverage@15%": 0.2320450097847358,
|
|
"calibration/coverage@20%": 0.3984833659491194,
|
|
"calibration/coverage@25%": 0.5518025318003914,
|
|
"calibration/coverage@30%": 0.7471654843444228,
|
|
"calibration/coverage@5%": 0.00859375,
|
|
"calibration/ece": 0.12770797905692954,
|
|
"calibration/mean_confidence": 0.5467555666867103,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 863.6,
|
|
"completions/max_terminated_length": 481.4,
|
|
"completions/mean_length": 159.551953125,
|
|
"completions/mean_terminated_length": 159.2826934814453,
|
|
"completions/min_length": 80.8,
|
|
"completions/min_terminated_length": 80.8,
|
|
"epoch": 0.48,
|
|
"grad_norm": 0.0010119550861418247,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 502235023.0,
|
|
"reward": 1.018705701828003,
|
|
"reward_std": 0.08774305582046509,
|
|
"rewards/accuracy_reward": 0.5771484375,
|
|
"rewards/brier_reward": 0.7982451438903808,
|
|
"rewards/confidence_uniqueness_reward": 0.9457098007202148,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002013521012850106,
|
|
"rewards/frontier_coverage_1": 0.11858767569065094,
|
|
"rewards/frontier_coverage_10": 0.11858767569065094,
|
|
"rewards/frontier_coverage_15": 0.11858767569065094,
|
|
"rewards/frontier_coverage_20": 0.0924240618944168,
|
|
"rewards/frontier_coverage_25": 0.06828025579452515,
|
|
"rewards/frontier_coverage_5": 0.11858767569065094,
|
|
"rewards/frontier_ece_reward": 0.007226689532399177,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1236328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1596635937690735,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.55625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06181640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06181640625,
|
|
"signal/advantage_abs_mean": 0.066917584836483,
|
|
"signal/advantage_pre_scale_abs_mean": 0.066917584836483,
|
|
"signal/advantage_pre_scale_std": 0.1149211123585701,
|
|
"signal/advantage_std": 0.1149211123585701,
|
|
"signal/brier_reward/centered_abs_mean": 0.1455012708902359,
|
|
"signal/brier_reward/group_std_mean": 0.18579219579696654,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018187658861279488,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018187658861279488,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02829902097582817,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.036865927278995514,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003537377621978521,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003537377621978521,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018063589930534362,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002892591571435332,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.233382631151471e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.233382631151471e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19047014713287352,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24632398784160614,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00340941553004086,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00340941553004086,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19047014713287352,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24632398784160614,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00340941553004086,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00340941553004086,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19047014713287352,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24632398784160614,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00340941553004086,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00340941553004086,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1285821259021759,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16771571040153505,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023016199003905056,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023016199003905056,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07739093005657197,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09800889045000076,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013852976029738785,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013852976029738785,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19047014713287352,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24632398784160614,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00340941553004086,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00340941553004086,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008578121662139893,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010665779560804367,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010722652077674866,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010722652077674866,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"eval_calibration/aurc": 0.4865714451779266,
|
|
"eval_calibration/batch_distribution_entropy": 0.8811929958426155,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9384680529539475,
|
|
"eval_calibration/confidence_entropy": 0.3955379708976029,
|
|
"eval_calibration/coverage@0%": 0.0234375,
|
|
"eval_calibration/coverage@1%": 0.0234375,
|
|
"eval_calibration/coverage@10%": 0.0234375,
|
|
"eval_calibration/coverage@15%": 0.0234375,
|
|
"eval_calibration/coverage@20%": 0.09375,
|
|
"eval_calibration/coverage@25%": 0.109375,
|
|
"eval_calibration/coverage@30%": 0.109375,
|
|
"eval_calibration/coverage@5%": 0.0234375,
|
|
"eval_calibration/ece": 0.2584375,
|
|
"eval_calibration/mean_confidence": 0.5084374999999999,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 287.25,
|
|
"eval_completions/max_terminated_length": 287.25,
|
|
"eval_completions/mean_length": 160.2589569091797,
|
|
"eval_completions/mean_terminated_length": 160.2589569091797,
|
|
"eval_completions/min_length": 91.5,
|
|
"eval_completions/min_terminated_length": 91.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 502235023.0,
|
|
"eval_reward": 0.936515599489212,
|
|
"eval_reward_std": 0.2348359413444996,
|
|
"eval_rewards/accuracy_reward": 0.4140625,
|
|
"eval_rewards/brier_reward": 0.7844546884298325,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.889892578125,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0032348172389902174,
|
|
"eval_rewards/frontier_coverage_1": 0.2147538997232914,
|
|
"eval_rewards/frontier_coverage_10": 0.2147538997232914,
|
|
"eval_rewards/frontier_coverage_15": 0.2147538997232914,
|
|
"eval_rewards/frontier_coverage_20": 0.140639740973711,
|
|
"eval_rewards/frontier_coverage_25": 0.07195262983441353,
|
|
"eval_rewards/frontier_coverage_5": 0.2147538997232914,
|
|
"eval_rewards/frontier_ece_reward": 0.008536459412425756,
|
|
"eval_runtime": 17.4381,
|
|
"eval_samples_per_second": 28.673,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.469482421875,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4919169917702675,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2347412109375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2347412109375,
|
|
"eval_signal/advantage_abs_mean": 0.2155938372015953,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2155938372015953,
|
|
"eval_signal/advantage_pre_scale_std": 0.23217838630080223,
|
|
"eval_signal/advantage_std": 0.23217838630080223,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.23349540308117867,
|
|
"eval_signal/brier_reward/group_std_mean": 0.28603896498680115,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029186925385147333,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.029186925385147333,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0508575439453125,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.061956305988132954,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0063571929931640625,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0063571929931640625,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0038491138839162886,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007427805452607572,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.889913493068889e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.889913493068889e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3643998056650162,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.44875599443912506,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006522756069898605,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006522756069898605,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3643998056650162,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.44875599443912506,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006522756069898605,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006522756069898605,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3643998056650162,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.44875599443912506,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006522756069898605,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006522756069898605,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2309923656284809,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.28767409920692444,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004134763090405613,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004134763090405613,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.11495377495884895,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.14584658294916153,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020576725364662707,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020576725364662707,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3643998056650162,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.44875599443912506,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006522756069898605,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006522756069898605,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.012437232304364443,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.014940991066396236,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015546540380455554,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015546540380455554,
|
|
"eval_steps_per_second": 0.229,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"step": 150,
|
|
"train_probe_calibration/aurc": 0.19899992017321977,
|
|
"train_probe_calibration/batch_distribution_entropy": 0.891369256496254,
|
|
"train_probe_calibration/buffer_distribution_entropy": 0.9381455575303994,
|
|
"train_probe_calibration/confidence_entropy": 0.44171598086053,
|
|
"train_probe_calibration/coverage@0%": 0.140625,
|
|
"train_probe_calibration/coverage@1%": 0.140625,
|
|
"train_probe_calibration/coverage@10%": 0.3203125,
|
|
"train_probe_calibration/coverage@15%": 0.46875,
|
|
"train_probe_calibration/coverage@20%": 0.671875,
|
|
"train_probe_calibration/coverage@25%": 0.7734375,
|
|
"train_probe_calibration/coverage@30%": 0.8359375,
|
|
"train_probe_calibration/coverage@5%": 0.140625,
|
|
"train_probe_calibration/ece": 0.162015625,
|
|
"train_probe_calibration/mean_confidence": 0.5688437500000001,
|
|
"train_probe_completions/clipped_ratio": 0.0,
|
|
"train_probe_completions/max_length": 285.5,
|
|
"train_probe_completions/max_terminated_length": 285.5,
|
|
"train_probe_completions/mean_length": 157.96134185791016,
|
|
"train_probe_completions/mean_terminated_length": 157.96134185791016,
|
|
"train_probe_completions/min_length": 84.75,
|
|
"train_probe_completions/min_terminated_length": 84.75,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 502235023.0,
|
|
"train_probe_reward": 1.041875422000885,
|
|
"train_probe_reward_std": 0.22206757217645645,
|
|
"train_probe_rewards/accuracy_reward": 0.634765625,
|
|
"train_probe_rewards/brier_reward": 0.8183042258024216,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.892333984375,
|
|
"train_probe_rewards/format_reward": 1.0,
|
|
"train_probe_rewards/frontier_aurc_reward": -0.0014572142390534282,
|
|
"train_probe_rewards/frontier_coverage_1": 0.10037144646048546,
|
|
"train_probe_rewards/frontier_coverage_10": 0.10037144646048546,
|
|
"train_probe_rewards/frontier_coverage_15": 0.10037144646048546,
|
|
"train_probe_rewards/frontier_coverage_20": 0.07581588346511126,
|
|
"train_probe_rewards/frontier_coverage_25": 0.06856801547110081,
|
|
"train_probe_rewards/frontier_coverage_5": 0.10037144646048546,
|
|
"train_probe_rewards/frontier_ece_reward": 0.007343103410676122,
|
|
"train_probe_runtime": 17.3511,
|
|
"train_probe_samples_per_second": 28.817,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4532470703125,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.4832051396369934,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22662353515625,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.22662353515625,
|
|
"train_probe_signal/advantage_abs_mean": 0.20156240463256836,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.20156240463256836,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.21973057836294174,
|
|
"train_probe_signal/advantage_std": 0.21973057836294174,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.20465417951345444,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.2636885643005371,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025581772439181805,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.025581772439181805,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.045745849609375,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.05602440424263477,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005718231201171875,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005718231201171875,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.002107554581016302,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.003866996383294463,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.772522450162796e-05,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.772522450162796e-05,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.35481464117765427,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.46584365516901016,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006351181888021529,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006351181888021529,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.35481464117765427,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.46584365516901016,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006351181888021529,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006351181888021529,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.35481464117765427,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.46584365516901016,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006351181888021529,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006351181888021529,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.22063589468598366,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.298631876707077,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003949382517021149,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003949382517021149,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.11186387576162815,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.14589739218354225,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020023633260279894,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020023633260279894,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.35481464117765427,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.46584365516901016,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006351181888021529,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006351181888021529,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.01217859354801476,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.015122672310099006,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001522324193501845,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001522324193501845,
|
|
"train_probe_steps_per_second": 0.231
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29528651859816535,
|
|
"calibration/batch_distribution_entropy": 0.921602674569398,
|
|
"calibration/buffer_distribution_entropy": 0.9364619815831505,
|
|
"calibration/confidence_entropy": 0.40908701768177347,
|
|
"calibration/coverage@0%": 0.009765625,
|
|
"calibration/coverage@1%": 0.009765625,
|
|
"calibration/coverage@10%": 0.14453125,
|
|
"calibration/coverage@15%": 0.226171875,
|
|
"calibration/coverage@20%": 0.346875,
|
|
"calibration/coverage@25%": 0.4140625,
|
|
"calibration/coverage@30%": 0.4953125,
|
|
"calibration/coverage@5%": 0.1,
|
|
"calibration/ece": 0.11914970568073154,
|
|
"calibration/mean_confidence": 0.5459722226119128,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 646.6,
|
|
"completions/max_terminated_length": 427.0,
|
|
"completions/mean_length": 160.25498046875,
|
|
"completions/mean_terminated_length": 160.12051391601562,
|
|
"completions/min_length": 77.0,
|
|
"completions/min_terminated_length": 77.0,
|
|
"epoch": 0.496,
|
|
"grad_norm": 0.0011867131106555462,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 519183874.0,
|
|
"reward": 1.0365911722183228,
|
|
"reward_std": 0.07778663039207459,
|
|
"rewards/accuracy_reward": 0.61103515625,
|
|
"rewards/brier_reward": 0.8111203789710999,
|
|
"rewards/confidence_uniqueness_reward": 0.9524305701255799,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0018819471122696995,
|
|
"rewards/frontier_coverage_1": 0.10418144315481186,
|
|
"rewards/frontier_coverage_10": 0.10418144315481186,
|
|
"rewards/frontier_coverage_15": 0.10418144315481186,
|
|
"rewards/frontier_coverage_20": 0.07317600697278977,
|
|
"rewards/frontier_coverage_25": 0.06316131204366685,
|
|
"rewards/frontier_coverage_5": 0.10418144315481186,
|
|
"rewards/frontier_ece_reward": 0.006889772973954678,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.093475341796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1266740679740906,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0467376708984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0467376708984375,
|
|
"signal/advantage_abs_mean": 0.058039630949497226,
|
|
"signal/advantage_pre_scale_abs_mean": 0.058039630949497226,
|
|
"signal/advantage_pre_scale_std": 0.10540584474802017,
|
|
"signal/advantage_std": 0.10540584474802017,
|
|
"signal/brier_reward/centered_abs_mean": 0.13347503244876863,
|
|
"signal/brier_reward/group_std_mean": 0.17193097174167632,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01668437905609608,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01668437905609608,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0233658567070961,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.030634360387921333,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029207320883870127,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029207320883870127,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018023386830464006,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0031401820946484805,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.226186199754011e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.226186199754011e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1628864049911499,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21382062137126923,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002915666624903679,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002915666624903679,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1628864049911499,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21382062137126923,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002915666624903679,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002915666624903679,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1628864049911499,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21382062137126923,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002915666624903679,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002915666624903679,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10325838029384612,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.136458557844162,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018483249470591546,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018483249470591546,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0682972326874733,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08658059686422348,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012225204147398472,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012225204147398472,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1628864049911499,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21382062137126923,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002915666624903679,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002915666624903679,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007767515070736408,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00975795928388834,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000970939383842051,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000970939383842051,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25764350837523864,
|
|
"calibration/batch_distribution_entropy": 0.9448658648442981,
|
|
"calibration/buffer_distribution_entropy": 0.934086791389927,
|
|
"calibration/confidence_entropy": 0.4335419018545725,
|
|
"calibration/coverage@0%": 0.008993548189823874,
|
|
"calibration/coverage@1%": 0.008993548189823874,
|
|
"calibration/coverage@10%": 0.23697330601761254,
|
|
"calibration/coverage@15%": 0.32298419153620356,
|
|
"calibration/coverage@20%": 0.3902037977005871,
|
|
"calibration/coverage@25%": 0.518359375,
|
|
"calibration/coverage@30%": 0.597265625,
|
|
"calibration/coverage@5%": 0.11814380503913893,
|
|
"calibration/ece": 0.14449420275244873,
|
|
"calibration/mean_confidence": 0.5363735101426749,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 387.6,
|
|
"completions/max_terminated_length": 387.6,
|
|
"completions/mean_length": 158.7716796875,
|
|
"completions/mean_terminated_length": 158.7716796875,
|
|
"completions/min_length": 81.2,
|
|
"completions/min_terminated_length": 81.2,
|
|
"epoch": 0.512,
|
|
"grad_norm": 0.0010637511732056737,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 535955360.0,
|
|
"reward": 1.0332459449768066,
|
|
"reward_std": 0.07940305918455123,
|
|
"rewards/accuracy_reward": 0.6001953125,
|
|
"rewards/brier_reward": 0.8189030528068543,
|
|
"rewards/confidence_uniqueness_reward": 0.9518810272216797,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.001749542192555964,
|
|
"rewards/frontier_coverage_1": 0.11576533690094948,
|
|
"rewards/frontier_coverage_10": 0.11576533690094948,
|
|
"rewards/frontier_coverage_15": 0.11576533690094948,
|
|
"rewards/frontier_coverage_20": 0.08045043498277664,
|
|
"rewards/frontier_coverage_25": 0.07256748080253601,
|
|
"rewards/frontier_coverage_5": 0.11576533690094948,
|
|
"rewards/frontier_ece_reward": 0.007211483735591173,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0985107421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.13301554769277574,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04925537109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04925537109375,
|
|
"signal/advantage_abs_mean": 0.05924607962369919,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05924607962369919,
|
|
"signal/advantage_pre_scale_std": 0.10775545537471772,
|
|
"signal/advantage_std": 0.10775545537471772,
|
|
"signal/brier_reward/centered_abs_mean": 0.12845354974269868,
|
|
"signal/brier_reward/group_std_mean": 0.1667891651391983,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016056693717837335,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016056693717837335,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02351841777563095,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.030801539495587348,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002939802221953869,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002939802221953869,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001706664077937603,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0029680487932637334,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.054928674828261e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.054928674828261e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16018467545509338,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21153274178504944,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028673056978732346,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028673056978732346,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16018467545509338,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21153274178504944,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028673056978732346,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028673056978732346,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16018467545509338,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21153274178504944,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028673056978732346,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028673056978732346,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09345034509897232,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12456403076648712,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00167276116553694,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00167276116553694,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06614456176757813,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08370408713817597,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011839876184239983,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011839876184239983,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16018467545509338,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21153274178504944,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028673056978732346,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028673056978732346,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007462390977889299,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009389066137373447,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009327988722361624,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009327988722361624,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1528418915395085,
|
|
"calibration/batch_distribution_entropy": 0.9146828022473686,
|
|
"calibration/buffer_distribution_entropy": 0.9326836952910534,
|
|
"calibration/confidence_entropy": 0.39974539369611933,
|
|
"calibration/coverage@0%": 0.02421875,
|
|
"calibration/coverage@1%": 0.02421875,
|
|
"calibration/coverage@10%": 0.445703125,
|
|
"calibration/coverage@15%": 0.590234375,
|
|
"calibration/coverage@20%": 0.705078125,
|
|
"calibration/coverage@25%": 0.8046875,
|
|
"calibration/coverage@30%": 0.893359375,
|
|
"calibration/coverage@5%": 0.165234375,
|
|
"calibration/ece": 0.1016723354137224,
|
|
"calibration/mean_confidence": 0.5691894614612776,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 658.6,
|
|
"completions/max_terminated_length": 433.4,
|
|
"completions/mean_length": 157.12998046875,
|
|
"completions/mean_terminated_length": 156.9954620361328,
|
|
"completions/min_length": 78.0,
|
|
"completions/min_terminated_length": 78.0,
|
|
"epoch": 0.528,
|
|
"grad_norm": 0.0010418170131742954,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 552593907.0,
|
|
"reward": 1.0281262636184691,
|
|
"reward_std": 0.0788488432765007,
|
|
"rewards/accuracy_reward": 0.5865234375,
|
|
"rewards/brier_reward": 0.821136748790741,
|
|
"rewards/confidence_uniqueness_reward": 0.9473978161811829,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0018390015000477433,
|
|
"rewards/frontier_coverage_1": 0.13756178915500641,
|
|
"rewards/frontier_coverage_10": 0.13756178915500641,
|
|
"rewards/frontier_coverage_15": 0.13756178915500641,
|
|
"rewards/frontier_coverage_20": 0.09277231395244598,
|
|
"rewards/frontier_coverage_25": 0.08103752583265304,
|
|
"rewards/frontier_coverage_5": 0.13756178915500641,
|
|
"rewards/frontier_ece_reward": 0.007350740581750869,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10626220703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.13978690654039383,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.053131103515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.053131103515625,
|
|
"signal/advantage_abs_mean": 0.060259700566530225,
|
|
"signal/advantage_pre_scale_abs_mean": 0.060259700566530225,
|
|
"signal/advantage_pre_scale_std": 0.10816312432289124,
|
|
"signal/advantage_std": 0.10816312432289124,
|
|
"signal/brier_reward/centered_abs_mean": 0.13122203350067138,
|
|
"signal/brier_reward/group_std_mean": 0.16875889003276826,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016402754187583923,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016402754187583923,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025891555473208427,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0340937253087759,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032364444341510534,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032364444341510534,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017435794696211814,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0029448256362229587,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1210070665110835e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1210070665110835e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17309306263923646,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22656363546848296,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003098365804180503,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003098365804180503,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17309306263923646,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22656363546848296,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003098365804180503,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003098365804180503,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17309306263923646,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22656363546848296,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003098365804180503,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003098365804180503,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10015229880809784,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.13174699544906615,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017927261302247643,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017927261302247643,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06691559106111526,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08470007181167602,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011977890972048044,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011977890972048044,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17309306263923646,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22656363546848296,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003098365804180503,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003098365804180503,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0074427520856261255,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009379717521369457,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009303440107032657,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009303440107032657,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.17694588682690862,
|
|
"calibration/batch_distribution_entropy": 0.8658933533472686,
|
|
"calibration/buffer_distribution_entropy": 0.9297515304240533,
|
|
"calibration/confidence_entropy": 0.3808740605876334,
|
|
"calibration/coverage@0%": 0.019140625,
|
|
"calibration/coverage@1%": 0.019140625,
|
|
"calibration/coverage@10%": 0.329296875,
|
|
"calibration/coverage@15%": 0.53828125,
|
|
"calibration/coverage@20%": 0.686328125,
|
|
"calibration/coverage@25%": 0.770703125,
|
|
"calibration/coverage@30%": 0.8421875,
|
|
"calibration/coverage@5%": 0.064453125,
|
|
"calibration/ece": 0.07548029076484816,
|
|
"calibration/mean_confidence": 0.6028442686101517,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 876.4,
|
|
"completions/max_terminated_length": 432.8,
|
|
"completions/mean_length": 158.95498046875,
|
|
"completions/mean_terminated_length": 158.68636169433594,
|
|
"completions/min_length": 79.0,
|
|
"completions/min_terminated_length": 79.0,
|
|
"epoch": 0.544,
|
|
"grad_norm": 0.0010623829439282417,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 569385190.0,
|
|
"reward": 1.0418287992477417,
|
|
"reward_std": 0.08479072451591492,
|
|
"rewards/accuracy_reward": 0.622265625,
|
|
"rewards/brier_reward": 0.8150393009185791,
|
|
"rewards/confidence_uniqueness_reward": 0.9497189283370971,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002168457116931677,
|
|
"rewards/frontier_coverage_1": 0.09459788501262664,
|
|
"rewards/frontier_coverage_10": 0.09459788501262664,
|
|
"rewards/frontier_coverage_15": 0.09174881279468536,
|
|
"rewards/frontier_coverage_20": 0.06693983972072601,
|
|
"rewards/frontier_coverage_25": 0.08642307072877883,
|
|
"rewards/frontier_coverage_5": 0.09459788501262664,
|
|
"rewards/frontier_ece_reward": 0.006162353791296482,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.108935546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.14547575116157532,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.578125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0544677734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0544677734375,
|
|
"signal/advantage_abs_mean": 0.06390135288238526,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06390135288238526,
|
|
"signal/advantage_pre_scale_std": 0.11458454579114914,
|
|
"signal/advantage_std": 0.11458454579114914,
|
|
"signal/brier_reward/centered_abs_mean": 0.1370469719171524,
|
|
"signal/brier_reward/group_std_mean": 0.1739386260509491,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01713087148964405,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01713087148964405,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025660135224461554,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0340105090290308,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032075169030576943,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032075169030576943,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002310140198096633,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003973044548183679,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.135150738875382e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.135150738875382e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16189839243888854,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21171375811100007,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002897981042042375,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002897981042042375,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16189839243888854,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21171375811100007,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002897981042042375,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002897981042042375,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15325535833835602,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20059145987033844,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027432709001004698,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027432709001004698,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09184687733650207,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12045546323060989,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016440590377897024,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016440590377897024,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07399061620235443,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09224920123815536,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013244319707155228,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013244319707155228,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16189839243888854,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21171375811100007,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002897981042042375,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002897981042042375,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007373248692601919,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009202315472066402,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009216560865752399,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009216560865752399,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20591658683091413,
|
|
"calibration/batch_distribution_entropy": 0.9011136319957318,
|
|
"calibration/buffer_distribution_entropy": 0.9278593868874762,
|
|
"calibration/confidence_entropy": 0.4000411831087075,
|
|
"calibration/coverage@0%": 0.018359375,
|
|
"calibration/coverage@1%": 0.018359375,
|
|
"calibration/coverage@10%": 0.23671875,
|
|
"calibration/coverage@15%": 0.369140625,
|
|
"calibration/coverage@20%": 0.588499113258317,
|
|
"calibration/coverage@25%": 0.7081213307240704,
|
|
"calibration/coverage@30%": 0.7772917685909981,
|
|
"calibration/coverage@5%": 0.1859375,
|
|
"calibration/ece": 0.1062459463639861,
|
|
"calibration/mean_confidence": 0.5850019076191907,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 876.2,
|
|
"completions/max_terminated_length": 401.2,
|
|
"completions/mean_length": 157.99453125,
|
|
"completions/mean_terminated_length": 157.725830078125,
|
|
"completions/min_length": 76.8,
|
|
"completions/min_terminated_length": 76.8,
|
|
"epoch": 0.56,
|
|
"grad_norm": 0.0010646632872521877,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 585824462.0,
|
|
"reward": 1.0233974695205688,
|
|
"reward_std": 0.07817947417497635,
|
|
"rewards/accuracy_reward": 0.57724609375,
|
|
"rewards/brier_reward": 0.8219172954559326,
|
|
"rewards/confidence_uniqueness_reward": 0.9498418092727661,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0025215481640771032,
|
|
"rewards/frontier_coverage_1": 0.13713131994009017,
|
|
"rewards/frontier_coverage_10": 0.13713131994009017,
|
|
"rewards/frontier_coverage_15": 0.1240748941898346,
|
|
"rewards/frontier_coverage_20": 0.08354234397411346,
|
|
"rewards/frontier_coverage_25": 0.08381873071193695,
|
|
"rewards/frontier_coverage_5": 0.13713131994009017,
|
|
"rewards/frontier_ece_reward": 0.006933646369725465,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.086468505859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1215772956609726,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0432342529296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0432342529296875,
|
|
"signal/advantage_abs_mean": 0.05740503966808319,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05740503966808319,
|
|
"signal/advantage_pre_scale_std": 0.10665770769119262,
|
|
"signal/advantage_std": 0.10665770769119262,
|
|
"signal/brier_reward/centered_abs_mean": 0.12967448830604553,
|
|
"signal/brier_reward/group_std_mean": 0.17017283141613007,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01620931103825569,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01620931103825569,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025316498056054116,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.033938854560256006,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031645622570067645,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031645622570067645,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025827214121818542,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004412530735135078,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6230711450334636e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6230711450334636e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14643085598945618,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19515539705753326,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026211123913526535,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026211123913526535,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14643085598945618,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19515539705753326,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026211123913526535,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026211123913526535,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12996700257062913,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17408455312252044,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002326409285888076,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002326409285888076,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07923403531312942,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10590324848890305,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014182891929522157,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014182891929522157,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07106765508651733,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09031975120306016,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012721109902486204,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012721109902486204,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14643085598945618,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19515539705753326,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026211123913526535,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026211123913526535,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006462567299604416,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.008214760478585959,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000807820912450552,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000807820912450552,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23616409851066922,
|
|
"calibration/batch_distribution_entropy": 0.9144159502843531,
|
|
"calibration/buffer_distribution_entropy": 0.9289005260583117,
|
|
"calibration/confidence_entropy": 0.4101810968351833,
|
|
"calibration/coverage@0%": 0.00859375,
|
|
"calibration/coverage@1%": 0.00859375,
|
|
"calibration/coverage@10%": 0.22890625,
|
|
"calibration/coverage@15%": 0.34106158088235294,
|
|
"calibration/coverage@20%": 0.46107689950980396,
|
|
"calibration/coverage@25%": 0.6399234068627451,
|
|
"calibration/coverage@30%": 0.7298238357843138,
|
|
"calibration/coverage@5%": 0.0875,
|
|
"calibration/ece": 0.10061718255943135,
|
|
"calibration/mean_confidence": 0.5690684021730142,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1069.4,
|
|
"completions/max_terminated_length": 392.8,
|
|
"completions/mean_length": 156.8580078125,
|
|
"completions/mean_terminated_length": 156.31970825195313,
|
|
"completions/min_length": 78.6,
|
|
"completions/min_terminated_length": 78.6,
|
|
"epoch": 0.576,
|
|
"grad_norm": 0.0010169014567509294,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 602617312.0,
|
|
"reward": 1.0232046604156495,
|
|
"reward_std": 0.07158796712756157,
|
|
"rewards/accuracy_reward": 0.5810546875,
|
|
"rewards/brier_reward": 0.8135895252227783,
|
|
"rewards/confidence_uniqueness_reward": 0.947513747215271,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0024039767682552337,
|
|
"rewards/frontier_coverage_1": 0.12831918448209761,
|
|
"rewards/frontier_coverage_10": 0.12831918448209761,
|
|
"rewards/frontier_coverage_15": 0.12189059555530549,
|
|
"rewards/frontier_coverage_20": 0.08235756382346153,
|
|
"rewards/frontier_coverage_25": 0.08532513380050659,
|
|
"rewards/frontier_coverage_5": 0.12831918448209761,
|
|
"rewards/frontier_ece_reward": 0.006019887700676918,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.076611328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.10829295367002487,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0383056640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0383056640625,
|
|
"signal/advantage_abs_mean": 0.052008964121341705,
|
|
"signal/advantage_pre_scale_abs_mean": 0.052008964121341705,
|
|
"signal/advantage_pre_scale_std": 0.09942405074834823,
|
|
"signal/advantage_std": 0.09942405074834823,
|
|
"signal/brier_reward/centered_abs_mean": 0.12525332272052764,
|
|
"signal/brier_reward/group_std_mean": 0.16305108666419982,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015656665340065955,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015656665340065955,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026879063248634337,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.036187725886702535,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003359882906079292,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003359882906079292,
|
|
"signal/format_reward/centered_abs_mean": 0.000933837890625,
|
|
"signal/format_reward/group_std_mean": 0.0024258273653686045,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002302885288372636,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004087219154462219,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.122164536966011e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.122164536966011e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14966692626476288,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19450730979442596,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002679037814959884,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002679037814959884,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14966692626476288,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19450730979442596,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002679037814959884,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002679037814959884,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13418073505163192,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17452655732631683,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002401835098862648,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002401835098862648,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08138690441846848,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1053778126835823,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014568255050107838,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014568255050107838,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06989559829235077,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08840005397796631,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001251131179742515,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001251131179742515,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14966692626476288,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19450730979442596,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002679037814959884,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002679037814959884,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006055058259516954,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007611721567809582,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007568822824396193,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007568822824396193,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25049247038311473,
|
|
"calibration/batch_distribution_entropy": 0.9216749923978981,
|
|
"calibration/buffer_distribution_entropy": 0.928906031519291,
|
|
"calibration/confidence_entropy": 0.4038246399408652,
|
|
"calibration/coverage@0%": 0.025390625,
|
|
"calibration/coverage@1%": 0.025390625,
|
|
"calibration/coverage@10%": 0.19765625,
|
|
"calibration/coverage@15%": 0.36015625,
|
|
"calibration/coverage@20%": 0.5173961900684931,
|
|
"calibration/coverage@25%": 0.6072766328277887,
|
|
"calibration/coverage@30%": 0.6674695755870841,
|
|
"calibration/coverage@5%": 0.10234375,
|
|
"calibration/ece": 0.1129630310966048,
|
|
"calibration/mean_confidence": 0.5487327841219951,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 598.2,
|
|
"completions/max_terminated_length": 358.6,
|
|
"completions/mean_length": 155.59599609375,
|
|
"completions/mean_terminated_length": 155.46160583496095,
|
|
"completions/min_length": 74.0,
|
|
"completions/min_terminated_length": 74.0,
|
|
"epoch": 0.592,
|
|
"grad_norm": 0.0011274260468780994,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 619378327.0,
|
|
"reward": 1.0228557944297791,
|
|
"reward_std": 0.07906165421009063,
|
|
"rewards/accuracy_reward": 0.5802734375,
|
|
"rewards/brier_reward": 0.8147315979003906,
|
|
"rewards/confidence_uniqueness_reward": 0.9459418058395386,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0021479753311723472,
|
|
"rewards/frontier_coverage_1": 0.1302879810333252,
|
|
"rewards/frontier_coverage_10": 0.1302879810333252,
|
|
"rewards/frontier_coverage_15": 0.1160609021782875,
|
|
"rewards/frontier_coverage_20": 0.0786726415157318,
|
|
"rewards/frontier_coverage_25": 0.08592544496059418,
|
|
"rewards/frontier_coverage_5": 0.1302879810333252,
|
|
"rewards/frontier_ece_reward": 0.006006046012043953,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1034423828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.13525836020708085,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05172119140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05172119140625,
|
|
"signal/advantage_abs_mean": 0.0604724645614624,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0604724645614624,
|
|
"signal/advantage_pre_scale_std": 0.10902182012796402,
|
|
"signal/advantage_std": 0.10902182012796402,
|
|
"signal/brier_reward/centered_abs_mean": 0.12949165105819702,
|
|
"signal/brier_reward/group_std_mean": 0.16734184324741364,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016186456382274627,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016186456382274627,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027432877197861673,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03624261319637299,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003429109649732709,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003429109649732709,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021959642181172967,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003895052522420883,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.930775710614398e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.930775710614398e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16536442041397095,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21532918214797975,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029600230976939202,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029600230976939202,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16536442041397095,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21532918214797975,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029600230976939202,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029600230976939202,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14321968853473663,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1867223024368286,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002563632372766733,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002563632372766733,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0868200957775116,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11267746090888978,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015540797030553222,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015540797030553222,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07138665020465851,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0901971310377121,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012778210220858455,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012778210220858455,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16536442041397095,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21532918214797975,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029600230976939202,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029600230976939202,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006090964470058679,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00773719884455204,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007613705587573349,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007613705587573349,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18192419947165778,
|
|
"calibration/batch_distribution_entropy": 0.8738511825598287,
|
|
"calibration/buffer_distribution_entropy": 0.9289629376118806,
|
|
"calibration/confidence_entropy": 0.3687290534199558,
|
|
"calibration/coverage@0%": 0.10390625,
|
|
"calibration/coverage@1%": 0.1578125,
|
|
"calibration/coverage@10%": 0.44656846257338556,
|
|
"calibration/coverage@15%": 0.5469736117906067,
|
|
"calibration/coverage@20%": 0.6266771648727985,
|
|
"calibration/coverage@25%": 0.7106829439823874,
|
|
"calibration/coverage@30%": 0.7673380931996086,
|
|
"calibration/coverage@5%": 0.3422218994618395,
|
|
"calibration/ece": 0.10703189610363692,
|
|
"calibration/mean_confidence": 0.5199164274477053,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 609.0,
|
|
"completions/max_terminated_length": 381.8,
|
|
"completions/mean_length": 156.66416015625,
|
|
"completions/mean_terminated_length": 156.52974548339844,
|
|
"completions/min_length": 75.8,
|
|
"completions/min_terminated_length": 75.8,
|
|
"epoch": 0.608,
|
|
"grad_norm": 0.0009081127354875207,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 635982056.0,
|
|
"reward": 1.025065505504608,
|
|
"reward_std": 0.06199713125824928,
|
|
"rewards/accuracy_reward": 0.571484375,
|
|
"rewards/brier_reward": 0.8416186571121216,
|
|
"rewards/confidence_uniqueness_reward": 0.9414823293685913,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.001796682784333825,
|
|
"rewards/frontier_coverage_1": 0.17411440312862397,
|
|
"rewards/frontier_coverage_10": 0.17411440312862397,
|
|
"rewards/frontier_coverage_15": 0.14766598343849183,
|
|
"rewards/frontier_coverage_20": 0.10131891369819641,
|
|
"rewards/frontier_coverage_25": 0.10434879511594772,
|
|
"rewards/frontier_coverage_5": 0.17411440312862397,
|
|
"rewards/frontier_ece_reward": 0.006736797094345093,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0841552734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11044697016477585,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.684375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04207763671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04207763671875,
|
|
"signal/advantage_abs_mean": 0.04641749858856201,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04641749858856201,
|
|
"signal/advantage_pre_scale_std": 0.09046411365270615,
|
|
"signal/advantage_std": 0.09046411365270615,
|
|
"signal/brier_reward/centered_abs_mean": 0.11504580080509186,
|
|
"signal/brier_reward/group_std_mean": 0.14834731221199035,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014380725100636482,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014380725100636482,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0289421908557415,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03680059537291527,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036177738569676877,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036177738569676877,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018506290158256888,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032447043806314467,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.31262570398394e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.31262570398394e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16321699619293212,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2083958327770233,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029215840622782707,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029215840622782707,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16321699619293212,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2083958327770233,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029215840622782707,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029215840622782707,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13163567185401917,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16858604550361633,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023562783375382424,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023562783375382424,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08355329185724258,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10619560033082961,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014956038678064943,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014956038678064943,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06687505841255188,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08423561900854111,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011970635503530502,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011970635503530502,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16321699619293212,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2083958327770233,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029215840622782707,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029215840622782707,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005484546534717083,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006870439555495977,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006855683168396354,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006855683168396354,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1971963236746678,
|
|
"calibration/batch_distribution_entropy": 0.9205140143105222,
|
|
"calibration/buffer_distribution_entropy": 0.9295399279070912,
|
|
"calibration/confidence_entropy": 0.40455422987883516,
|
|
"calibration/coverage@0%": 0.0234375,
|
|
"calibration/coverage@1%": 0.061328125,
|
|
"calibration/coverage@10%": 0.33671875,
|
|
"calibration/coverage@15%": 0.4609375,
|
|
"calibration/coverage@20%": 0.562890625,
|
|
"calibration/coverage@25%": 0.635546875,
|
|
"calibration/coverage@30%": 0.78515625,
|
|
"calibration/coverage@5%": 0.2,
|
|
"calibration/ece": 0.09562349489051207,
|
|
"calibration/mean_confidence": 0.524824910104146,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 385.6,
|
|
"completions/max_terminated_length": 385.6,
|
|
"completions/mean_length": 158.17109375,
|
|
"completions/mean_terminated_length": 158.17109375,
|
|
"completions/min_length": 75.4,
|
|
"completions/min_terminated_length": 75.4,
|
|
"epoch": 0.624,
|
|
"grad_norm": 0.001079601002857089,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 652945632.0,
|
|
"reward": 1.0264529228210448,
|
|
"reward_std": 0.0716327577829361,
|
|
"rewards/accuracy_reward": 0.57841796875,
|
|
"rewards/brier_reward": 0.8322904944419861,
|
|
"rewards/confidence_uniqueness_reward": 0.9480019688606263,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.00190048823133111,
|
|
"rewards/frontier_coverage_1": 0.15654837489128112,
|
|
"rewards/frontier_coverage_10": 0.15654837489128112,
|
|
"rewards/frontier_coverage_15": 0.13024692088365555,
|
|
"rewards/frontier_coverage_20": 0.09054728597402573,
|
|
"rewards/frontier_coverage_25": 0.09681654870510101,
|
|
"rewards/frontier_coverage_5": 0.15654837489128112,
|
|
"rewards/frontier_ece_reward": 0.005586811527609825,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.097406005859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12599362283945084,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.64375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0487030029296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0487030029296875,
|
|
"signal/advantage_abs_mean": 0.055555340647697446,
|
|
"signal/advantage_pre_scale_abs_mean": 0.055555340647697446,
|
|
"signal/advantage_pre_scale_std": 0.10187341719865799,
|
|
"signal/advantage_std": 0.10187341719865799,
|
|
"signal/brier_reward/centered_abs_mean": 0.12212891280651092,
|
|
"signal/brier_reward/group_std_mean": 0.15707454681396485,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015266114100813865,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015266114100813865,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024161863327026366,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03125019893050194,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003020232915878296,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003020232915878296,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001966876885853708,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003790367441251874,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5207096880185416e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5207096880185416e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.166505765914917,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2163769483566284,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00298045314848423,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00298045314848423,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.166505765914917,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2163769483566284,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00298045314848423,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00298045314848423,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12933739721775056,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16911623477935792,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002315139351412654,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002315139351412654,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0790199413895607,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10347112566232682,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014144569169729948,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014144569169729948,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06764063239097595,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08631972819566727,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012107673101127148,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012107673101127148,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.166505765914917,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2163769483566284,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00298045314848423,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00298045314848423,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005112008564174176,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006569109484553337,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000639001070521772,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000639001070521772,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.195020748165163,
|
|
"calibration/batch_distribution_entropy": 0.8922545357396509,
|
|
"calibration/buffer_distribution_entropy": 0.9305442441617402,
|
|
"calibration/confidence_entropy": 0.3912324496194214,
|
|
"calibration/coverage@0%": 0.044140625,
|
|
"calibration/coverage@1%": 0.044140625,
|
|
"calibration/coverage@10%": 0.32421875,
|
|
"calibration/coverage@15%": 0.473046875,
|
|
"calibration/coverage@20%": 0.556640625,
|
|
"calibration/coverage@25%": 0.665234375,
|
|
"calibration/coverage@30%": 0.76875,
|
|
"calibration/coverage@5%": 0.233984375,
|
|
"calibration/ece": 0.12871549991004977,
|
|
"calibration/mean_confidence": 0.5960780698014887,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 374.2,
|
|
"completions/max_terminated_length": 374.2,
|
|
"completions/mean_length": 161.2595703125,
|
|
"completions/mean_terminated_length": 161.2595703125,
|
|
"completions/min_length": 79.6,
|
|
"completions/min_terminated_length": 79.6,
|
|
"epoch": 0.64,
|
|
"grad_norm": 0.0008534787921234965,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 669939618.0,
|
|
"reward": 1.0402125120162964,
|
|
"reward_std": 0.0648931972682476,
|
|
"rewards/accuracy_reward": 0.613671875,
|
|
"rewards/brier_reward": 0.8254677057266235,
|
|
"rewards/confidence_uniqueness_reward": 0.946368408203125,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002193172019906342,
|
|
"rewards/frontier_coverage_1": 0.11940560638904571,
|
|
"rewards/frontier_coverage_10": 0.11940560638904571,
|
|
"rewards/frontier_coverage_15": 0.09389316588640213,
|
|
"rewards/frontier_coverage_20": 0.07351961880922317,
|
|
"rewards/frontier_coverage_25": 0.10749737620353698,
|
|
"rewards/frontier_coverage_5": 0.11940560638904571,
|
|
"rewards/frontier_ece_reward": 0.004826861340552569,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07481689453125,
|
|
"signal/accuracy_reward/group_std_mean": 0.10642163604497909,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.037408447265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.037408447265625,
|
|
"signal/advantage_abs_mean": 0.04719259664416313,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04719259664416313,
|
|
"signal/advantage_pre_scale_std": 0.0948547139763832,
|
|
"signal/advantage_std": 0.0948547139763832,
|
|
"signal/brier_reward/centered_abs_mean": 0.11356604993343353,
|
|
"signal/brier_reward/group_std_mean": 0.14705831706523895,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014195756241679191,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014195756241679191,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025693368911743165,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03297973945736885,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032116711139678956,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032116711139678956,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002536199474707246,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004635827429592609,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5397969006444325e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5397969006444325e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13650378882884978,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1806756556034088,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024434176739305258,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024434176739305258,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13650378882884978,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1806756556034088,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024434176739305258,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024434176739305258,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09933190941810607,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13171655982732772,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017780411522835492,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017780411522835492,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06690454185009002,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0870763123035431,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011975912610068917,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011975912610068917,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07027508169412613,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09005680382251739,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012579238740727306,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012579238740727306,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13650378882884978,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1806756556034088,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024434176739305258,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024434176739305258,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004354535695165395,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005637980904430151,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005443169618956744,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005443169618956744,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"eval_calibration/aurc": 0.46342354183372153,
|
|
"eval_calibration/batch_distribution_entropy": 0.8267386701063087,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9303503713284912,
|
|
"eval_calibration/confidence_entropy": 0.36764638225435886,
|
|
"eval_calibration/coverage@0%": 0.1015625,
|
|
"eval_calibration/coverage@1%": 0.1015625,
|
|
"eval_calibration/coverage@10%": 0.1171875,
|
|
"eval_calibration/coverage@15%": 0.140625,
|
|
"eval_calibration/coverage@20%": 0.1484375,
|
|
"eval_calibration/coverage@25%": 0.2421875,
|
|
"eval_calibration/coverage@30%": 0.3125,
|
|
"eval_calibration/coverage@5%": 0.1015625,
|
|
"eval_calibration/ece": 0.26115843455188675,
|
|
"eval_calibration/mean_confidence": 0.5374084345518868,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 338.5,
|
|
"eval_completions/max_terminated_length": 338.5,
|
|
"eval_completions/mean_length": 165.4894256591797,
|
|
"eval_completions/mean_terminated_length": 165.4894256591797,
|
|
"eval_completions/min_length": 96.0,
|
|
"eval_completions/min_terminated_length": 96.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 669939618.0,
|
|
"eval_reward": 0.9351394772529602,
|
|
"eval_reward_std": 0.24361010268330574,
|
|
"eval_rewards/accuracy_reward": 0.41015625,
|
|
"eval_rewards/brier_reward": 0.7876222133636475,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.893798828125,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.005775218247435987,
|
|
"eval_rewards/frontier_coverage_1": 0.24114028364419937,
|
|
"eval_rewards/frontier_coverage_10": 0.24114028364419937,
|
|
"eval_rewards/frontier_coverage_15": 0.1763102523982525,
|
|
"eval_rewards/frontier_coverage_20": 0.10979359783232212,
|
|
"eval_rewards/frontier_coverage_25": 0.06296418234705925,
|
|
"eval_rewards/frontier_coverage_5": 0.24114028364419937,
|
|
"eval_rewards/frontier_ece_reward": 0.006316208629868925,
|
|
"eval_runtime": 18.1608,
|
|
"eval_samples_per_second": 27.532,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.462158203125,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4877973794937134,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2310791015625,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2310791015625,
|
|
"eval_signal/advantage_abs_mean": 0.22277260944247246,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.22277260944247246,
|
|
"eval_signal/advantage_pre_scale_std": 0.24095501005649567,
|
|
"eval_signal/advantage_std": 0.24095501005649567,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.25160689651966095,
|
|
"eval_signal/brier_reward/group_std_mean": 0.3050123006105423,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03145086206495762,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.03145086206495762,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0444183349609375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05229387618601322,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0055522918701171875,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0055522918701171875,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.008398046251386404,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.016706117428839207,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00015032502415124327,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00015032502415124327,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3698094040155411,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.43596525490283966,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006619588239118457,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006619588239118457,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3698094040155411,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.43596525490283966,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006619588239118457,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006619588239118457,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.2656380385160446,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.31497038900852203,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004754920839332044,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004754920839332044,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.15232503414154053,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.1873607039451599,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002726617909502238,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002726617909502238,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.14092491567134857,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.18245521932840347,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025225559365935624,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025225559365935624,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3698094040155411,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.43596525490283966,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006619588239118457,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006619588239118457,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.008692699484527111,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.010699421167373657,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010865874355658889,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010865874355658889,
|
|
"eval_steps_per_second": 0.22,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"step": 200,
|
|
"train_probe_calibration/aurc": 0.18443769404155097,
|
|
"train_probe_calibration/batch_distribution_entropy": 0.8015818144093043,
|
|
"train_probe_calibration/buffer_distribution_entropy": 0.9300096207172028,
|
|
"train_probe_calibration/confidence_entropy": 0.3722767016939027,
|
|
"train_probe_calibration/coverage@0%": 0.2890625,
|
|
"train_probe_calibration/coverage@1%": 0.2890625,
|
|
"train_probe_calibration/coverage@10%": 0.3515625,
|
|
"train_probe_calibration/coverage@15%": 0.5078125,
|
|
"train_probe_calibration/coverage@20%": 0.6015625,
|
|
"train_probe_calibration/coverage@25%": 0.640625,
|
|
"train_probe_calibration/coverage@30%": 0.7578125,
|
|
"train_probe_calibration/coverage@5%": 0.2890625,
|
|
"train_probe_calibration/ece": 0.17925781250000003,
|
|
"train_probe_calibration/mean_confidence": 0.6127734375,
|
|
"train_probe_completions/clipped_ratio": 0.0,
|
|
"train_probe_completions/max_length": 298.5,
|
|
"train_probe_completions/max_terminated_length": 298.5,
|
|
"train_probe_completions/mean_length": 161.12843322753906,
|
|
"train_probe_completions/mean_terminated_length": 161.12843322753906,
|
|
"train_probe_completions/min_length": 94.5,
|
|
"train_probe_completions/min_terminated_length": 94.5,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 669939618.0,
|
|
"train_probe_reward": 1.06145441532135,
|
|
"train_probe_reward_std": 0.22316357120871544,
|
|
"train_probe_rewards/accuracy_reward": 0.66796875,
|
|
"train_probe_rewards/brier_reward": 0.8423600494861603,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.89111328125,
|
|
"train_probe_rewards/format_reward": 1.0,
|
|
"train_probe_rewards/frontier_aurc_reward": -0.0010826691941474564,
|
|
"train_probe_rewards/frontier_coverage_1": 0.0965785188600421,
|
|
"train_probe_rewards/frontier_coverage_10": 0.0965785188600421,
|
|
"train_probe_rewards/frontier_coverage_15": 0.07919098529964685,
|
|
"train_probe_rewards/frontier_coverage_20": 0.07169051561504602,
|
|
"train_probe_rewards/frontier_coverage_25": 0.13048473186790943,
|
|
"train_probe_rewards/frontier_coverage_5": 0.0965785188600421,
|
|
"train_probe_rewards/frontier_ece_reward": 0.0046604592353105545,
|
|
"train_probe_runtime": 17.1156,
|
|
"train_probe_samples_per_second": 29.213,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.43115234375,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.47117266058921814,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.215576171875,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.215576171875,
|
|
"train_probe_signal/advantage_abs_mean": 0.19968737289309502,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.19968737289309502,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.22083420678973198,
|
|
"train_probe_signal/advantage_std": 0.22083420678973198,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.1940205954015255,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.2574399895966053,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024252574425190687,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.024252574425190687,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.047943115234375,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.05579993408173323,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005992889404296875,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005992889404296875,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0022220485552679747,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0040761920099612325,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.977466849391931e-05,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.977466849391931e-05,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.31603457778692245,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.44148707389831543,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005657018744386733,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005657018744386733,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.31603457778692245,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.44148707389831543,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005657018744386733,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005657018744386733,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.22388429939746857,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.31913936883211136,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004007528768852353,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004007528768852353,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.12689215876162052,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.18408489972352982,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002271369507070631,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002271369507070631,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.13436606898903847,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.1626235581934452,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024051525979302824,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024051525979302824,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.31603457778692245,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.44148707389831543,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005657018744386733,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005657018744386733,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.007275205687619746,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.009898353135213256,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009094007109524682,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009094007109524682,
|
|
"train_probe_steps_per_second": 0.234
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29317397217948216,
|
|
"calibration/batch_distribution_entropy": 0.9201408367606426,
|
|
"calibration/buffer_distribution_entropy": 0.9305788418920775,
|
|
"calibration/confidence_entropy": 0.41399374057065597,
|
|
"calibration/coverage@0%": 0.01328125,
|
|
"calibration/coverage@1%": 0.01328125,
|
|
"calibration/coverage@10%": 0.093359375,
|
|
"calibration/coverage@15%": 0.15234375,
|
|
"calibration/coverage@20%": 0.32890625,
|
|
"calibration/coverage@25%": 0.441015625,
|
|
"calibration/coverage@30%": 0.597265625,
|
|
"calibration/coverage@5%": 0.063671875,
|
|
"calibration/ece": 0.13287014590320417,
|
|
"calibration/mean_confidence": 0.5697344575082802,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 395.8,
|
|
"completions/max_terminated_length": 395.8,
|
|
"completions/mean_length": 164.48642578125,
|
|
"completions/mean_terminated_length": 164.48642578125,
|
|
"completions/min_length": 82.6,
|
|
"completions/min_terminated_length": 82.6,
|
|
"epoch": 0.656,
|
|
"grad_norm": 0.0009725289419293404,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 686480503.0,
|
|
"reward": 1.0179179072380067,
|
|
"reward_std": 0.07355367988348008,
|
|
"rewards/accuracy_reward": 0.57080078125,
|
|
"rewards/brier_reward": 0.8100694179534912,
|
|
"rewards/confidence_uniqueness_reward": 0.9493492126464844,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.00356218283995986,
|
|
"rewards/frontier_coverage_1": 0.13388518393039703,
|
|
"rewards/frontier_coverage_10": 0.13388518393039703,
|
|
"rewards/frontier_coverage_15": 0.10320408940315247,
|
|
"rewards/frontier_coverage_20": 0.07639760747551919,
|
|
"rewards/frontier_coverage_25": 0.09401055723428726,
|
|
"rewards/frontier_coverage_5": 0.13388518393039703,
|
|
"rewards/frontier_ece_reward": 0.004533285135403275,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.086358642578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1176445797085762,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0431793212890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0431793212890625,
|
|
"signal/advantage_abs_mean": 0.05536918267607689,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05536918267607689,
|
|
"signal/advantage_pre_scale_std": 0.10483470559120178,
|
|
"signal/advantage_std": 0.10483470559120178,
|
|
"signal/brier_reward/centered_abs_mean": 0.12581576704978942,
|
|
"signal/brier_reward/group_std_mean": 0.16321150958538055,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015726970881223677,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015726970881223677,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023859953880310057,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03090248741209507,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002982494235038757,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002982494235038757,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004037552513182163,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.006974977813661099,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.227218957268633e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.227218957268633e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14410681128501893,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19003032743930817,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002579511888325214,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002579511888325214,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14410681128501893,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19003032743930817,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002579511888325214,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002579511888325214,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10301252007484436,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13664860129356385,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018439240287989379,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018439240287989379,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06870696991682053,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08993822485208511,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001229854696430266,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001229854696430266,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07536256462335586,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09631493389606476,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013489898992702365,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013489898992702365,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14410681128501893,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19003032743930817,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002579511888325214,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002579511888325214,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004476304817944765,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00577198239043355,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005595381022430957,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005595381022430957,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23434450674410473,
|
|
"calibration/batch_distribution_entropy": 0.882002562151535,
|
|
"calibration/buffer_distribution_entropy": 0.9311633807634617,
|
|
"calibration/confidence_entropy": 0.37414299995746625,
|
|
"calibration/coverage@0%": 0.0328125,
|
|
"calibration/coverage@1%": 0.0328125,
|
|
"calibration/coverage@10%": 0.23125,
|
|
"calibration/coverage@15%": 0.301953125,
|
|
"calibration/coverage@20%": 0.412109375,
|
|
"calibration/coverage@25%": 0.5375,
|
|
"calibration/coverage@30%": 0.68828125,
|
|
"calibration/coverage@5%": 0.071484375,
|
|
"calibration/ece": 0.12175364717200503,
|
|
"calibration/mean_confidence": 0.5939089459842449,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 393.4,
|
|
"completions/max_terminated_length": 393.4,
|
|
"completions/mean_length": 165.54716796875,
|
|
"completions/mean_terminated_length": 165.54716796875,
|
|
"completions/min_length": 81.8,
|
|
"completions/min_terminated_length": 81.8,
|
|
"epoch": 0.672,
|
|
"grad_norm": 0.000927310436964035,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 703089146.0,
|
|
"reward": 1.024551224708557,
|
|
"reward_std": 0.069550159573555,
|
|
"rewards/accuracy_reward": 0.57529296875,
|
|
"rewards/brier_reward": 0.8332065463066101,
|
|
"rewards/confidence_uniqueness_reward": 0.9376014709472656,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0033317374996840953,
|
|
"rewards/frontier_coverage_1": 0.16638074517250062,
|
|
"rewards/frontier_coverage_10": 0.16638074517250062,
|
|
"rewards/frontier_coverage_15": 0.12547548562288285,
|
|
"rewards/frontier_coverage_20": 0.09414769113063812,
|
|
"rewards/frontier_coverage_25": 0.11692911386489868,
|
|
"rewards/frontier_coverage_5": 0.16638074517250062,
|
|
"rewards/frontier_ece_reward": 0.005235725268721581,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.088909912109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11973689049482346,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0444549560546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0444549560546875,
|
|
"signal/advantage_abs_mean": 0.05134270042181015,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05134270042181015,
|
|
"signal/advantage_pre_scale_std": 0.09901983886957169,
|
|
"signal/advantage_std": 0.09901983886957169,
|
|
"signal/brier_reward/centered_abs_mean": 0.12112097889184952,
|
|
"signal/brier_reward/group_std_mean": 0.15756649971008302,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01514012236148119,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01514012236148119,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031430721282958984,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03886085823178291,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003928840160369873,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003928840160369873,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00404848949983716,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0072443762794137,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.246795867104084e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.246795867104084e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1571286678314209,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20347483158111573,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028126030694693325,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028126030694693325,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1571286678314209,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20347483158111573,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028126030694693325,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028126030694693325,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11069501340389251,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.14373029470443727,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001981440628878772,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001981440628878772,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07589569091796874,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09735166430473327,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013585327193140983,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013585327193140983,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0746377795934677,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09605260342359542,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013360162265598774,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013360162265598774,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1571286678314209,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20347483158111573,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028126030694693325,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028126030694693325,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004420119524002075,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005659045279026031,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005525149405002594,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005525149405002594,
|
|
"step": 210
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22316816925244712,
|
|
"calibration/batch_distribution_entropy": 0.8631638001153454,
|
|
"calibration/buffer_distribution_entropy": 0.9303134498916309,
|
|
"calibration/confidence_entropy": 0.36794004434092653,
|
|
"calibration/coverage@0%": 0.0859375,
|
|
"calibration/coverage@1%": 0.109375,
|
|
"calibration/coverage@10%": 0.277734375,
|
|
"calibration/coverage@15%": 0.41875,
|
|
"calibration/coverage@20%": 0.578515625,
|
|
"calibration/coverage@25%": 0.65,
|
|
"calibration/coverage@30%": 0.718359375,
|
|
"calibration/coverage@5%": 0.208984375,
|
|
"calibration/ece": 0.11836120683661663,
|
|
"calibration/mean_confidence": 0.5856790014967167,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 431.6,
|
|
"completions/max_terminated_length": 431.6,
|
|
"completions/mean_length": 171.991015625,
|
|
"completions/mean_terminated_length": 171.991015625,
|
|
"completions/min_length": 84.4,
|
|
"completions/min_terminated_length": 84.4,
|
|
"epoch": 0.688,
|
|
"grad_norm": 0.0016493016155436635,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 719804254.0,
|
|
"reward": 1.035550093650818,
|
|
"reward_std": 0.07330326288938523,
|
|
"rewards/accuracy_reward": 0.6015625,
|
|
"rewards/brier_reward": 0.8307091474533081,
|
|
"rewards/confidence_uniqueness_reward": 0.9405294418334961,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002609227574430406,
|
|
"rewards/frontier_coverage_1": 0.13768716901540756,
|
|
"rewards/frontier_coverage_10": 0.13768716901540756,
|
|
"rewards/frontier_coverage_15": 0.10304213985800743,
|
|
"rewards/frontier_coverage_20": 0.08185177743434906,
|
|
"rewards/frontier_coverage_25": 0.12052069902420044,
|
|
"rewards/frontier_coverage_5": 0.13768716901540756,
|
|
"rewards/frontier_ece_reward": 0.004790456034243107,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0959228515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.13043897598981857,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04796142578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04796142578125,
|
|
"signal/advantage_abs_mean": 0.05425951853394508,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05425951853394508,
|
|
"signal/advantage_pre_scale_std": 0.1053330883383751,
|
|
"signal/advantage_std": 0.1053330883383751,
|
|
"signal/brier_reward/centered_abs_mean": 0.11450777053833008,
|
|
"signal/brier_reward/group_std_mean": 0.15139109492301941,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01431347131729126,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01431347131729126,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02984708845615387,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03796382881700992,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037308860570192336,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037308860570192336,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028274263255298137,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004675904382020235,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.06109277921496e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.06109277921496e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14792871475219727,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1949632316827774,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002647924004122615,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002647924004122615,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14792871475219727,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1949632316827774,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002647924004122615,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002647924004122615,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10406249761581421,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1370965600013733,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018627186771482229,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018627186771482229,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07288601100444794,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09395883530378342,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013046595733612776,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013046595733612776,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07228792309761048,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09277454912662506,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012939537642523645,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012939537642523645,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14792871475219727,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1949632316827774,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002647924004122615,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002647924004122615,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004083223734050989,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0053210449405014515,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005104029667563736,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005104029667563736,
|
|
"step": 215
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16011508519990608,
|
|
"calibration/batch_distribution_entropy": 0.8181701793524606,
|
|
"calibration/buffer_distribution_entropy": 0.928059463965063,
|
|
"calibration/confidence_entropy": 0.3376720196263639,
|
|
"calibration/coverage@0%": 0.01796875,
|
|
"calibration/coverage@1%": 0.01796875,
|
|
"calibration/coverage@10%": 0.36796875,
|
|
"calibration/coverage@15%": 0.621484375,
|
|
"calibration/coverage@20%": 0.724609375,
|
|
"calibration/coverage@25%": 0.799609375,
|
|
"calibration/coverage@30%": 0.86796875,
|
|
"calibration/coverage@5%": 0.21015625,
|
|
"calibration/ece": 0.08461852811120732,
|
|
"calibration/mean_confidence": 0.6100572855360106,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 441.6,
|
|
"completions/max_terminated_length": 441.6,
|
|
"completions/mean_length": 173.48193359375,
|
|
"completions/mean_terminated_length": 173.48193359375,
|
|
"completions/min_length": 87.4,
|
|
"completions/min_terminated_length": 87.4,
|
|
"epoch": 0.704,
|
|
"grad_norm": 0.0007582867401652038,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 736446853.0,
|
|
"reward": 1.0405026197433471,
|
|
"reward_std": 0.06702196821570397,
|
|
"rewards/accuracy_reward": 0.6091796875,
|
|
"rewards/brier_reward": 0.8383830785751343,
|
|
"rewards/confidence_uniqueness_reward": 0.9407512187957764,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.00258009375538677,
|
|
"rewards/frontier_coverage_1": 0.13729006946086883,
|
|
"rewards/frontier_coverage_10": 0.13729006946086883,
|
|
"rewards/frontier_coverage_15": 0.10272664576768875,
|
|
"rewards/frontier_coverage_20": 0.08358165025711059,
|
|
"rewards/frontier_coverage_25": 0.12972914576530456,
|
|
"rewards/frontier_coverage_5": 0.13729006946086883,
|
|
"rewards/frontier_ece_reward": 0.004691596981137991,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08326416015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10747589468955994,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.7,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.041632080078125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.041632080078125,
|
|
"signal/advantage_abs_mean": 0.051660557836294176,
|
|
"signal/advantage_pre_scale_abs_mean": 0.051660557836294176,
|
|
"signal/advantage_pre_scale_std": 0.09930311441421509,
|
|
"signal/advantage_std": 0.09930311441421509,
|
|
"signal/brier_reward/centered_abs_mean": 0.11549538522958755,
|
|
"signal/brier_reward/group_std_mean": 0.15128278136253356,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014436923153698444,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014436923153698444,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02790890485048294,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.035958658903837204,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034886131063103674,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034886131063103674,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028505324851721527,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004907863447442651,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.102453142171726e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.102453142171726e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14127331972122192,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18485023081302643,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002528792293742299,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002528792293742299,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14127331972122192,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18485023081302643,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002528792293742299,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002528792293742299,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09733048528432846,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1279260739684105,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001742215733975172,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001742215733975172,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06843101680278778,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08805256187915803,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012249151477590203,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012249151477590203,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07552328407764435,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09613100737333298,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013518667314201594,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013518667314201594,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14127331972122192,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18485023081302643,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002528792293742299,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002528792293742299,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003918514354154468,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005041631869971752,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004898142942693084,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004898142942693084,
|
|
"step": 220
|
|
},
|
|
{
|
|
"calibration/aurc": 0.17004395086361163,
|
|
"calibration/batch_distribution_entropy": 0.8424665156534161,
|
|
"calibration/buffer_distribution_entropy": 0.9242743292015563,
|
|
"calibration/confidence_entropy": 0.35076926772260686,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.369921875,
|
|
"calibration/coverage@15%": 0.55390625,
|
|
"calibration/coverage@20%": 0.665234375,
|
|
"calibration/coverage@25%": 0.746875,
|
|
"calibration/coverage@30%": 0.82890625,
|
|
"calibration/coverage@5%": 0.2203125,
|
|
"calibration/ece": 0.09630225556649823,
|
|
"calibration/mean_confidence": 0.6019314501975569,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 416.2,
|
|
"completions/max_terminated_length": 416.2,
|
|
"completions/mean_length": 174.01953125,
|
|
"completions/mean_terminated_length": 174.01953125,
|
|
"completions/min_length": 84.6,
|
|
"completions/min_terminated_length": 84.6,
|
|
"epoch": 0.72,
|
|
"grad_norm": 0.0010777495335787535,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 753238669.0,
|
|
"reward": 1.045905351638794,
|
|
"reward_std": 0.07003419697284699,
|
|
"rewards/accuracy_reward": 0.61875,
|
|
"rewards/brier_reward": 0.8430420279502868,
|
|
"rewards/confidence_uniqueness_reward": 0.9419174194335938,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.00205157226882875,
|
|
"rewards/frontier_coverage_1": 0.13308198153972625,
|
|
"rewards/frontier_coverage_10": 0.13073740005493165,
|
|
"rewards/frontier_coverage_15": 0.09758596122264862,
|
|
"rewards/frontier_coverage_20": 0.08409450352191924,
|
|
"rewards/frontier_coverage_25": 0.14153032451868058,
|
|
"rewards/frontier_coverage_5": 0.13308198153972625,
|
|
"rewards/frontier_ece_reward": 0.004457022994756699,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.090283203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12350248396396638,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451416015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0451416015625,
|
|
"signal/advantage_abs_mean": 0.05188203603029251,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05188203603029251,
|
|
"signal/advantage_pre_scale_std": 0.10271037220954896,
|
|
"signal/advantage_std": 0.10271037220954896,
|
|
"signal/brier_reward/centered_abs_mean": 0.11129094362258911,
|
|
"signal/brier_reward/group_std_mean": 0.1453452318906784,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013911367952823639,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013911367952823639,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02697415351867676,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0341521717607975,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003371769189834595,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003371769189834595,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022045062622055413,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0037422746885567904,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9460661719203925e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9460661719203925e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1369688868522644,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18146575391292571,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024517430458217858,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024517430458217858,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1342229038476944,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17787247598171235,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024025900289416312,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024025900289416312,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08977452963590622,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11959208399057389,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001606964087113738,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001606964087113738,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06367998197674751,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08262477666139603,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011398716131225228,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011398716131225228,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07500105649232865,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09586530327796935,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013425188139081002,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013425188139081002,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1369688868522644,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18146575391292571,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024517430458217858,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024517430458217858,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0035899627022445203,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004766473919153214,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00044874533778056503,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00044874533778056503,
|
|
"step": 225
|
|
},
|
|
{
|
|
"calibration/aurc": 0.15345096336525904,
|
|
"calibration/batch_distribution_entropy": 0.8268276483686279,
|
|
"calibration/buffer_distribution_entropy": 0.9194886851924824,
|
|
"calibration/confidence_entropy": 0.3558457240962842,
|
|
"calibration/coverage@0%": 0.159765625,
|
|
"calibration/coverage@1%": 0.20546875,
|
|
"calibration/coverage@10%": 0.430859375,
|
|
"calibration/coverage@15%": 0.4765625,
|
|
"calibration/coverage@20%": 0.64453125,
|
|
"calibration/coverage@25%": 0.752734375,
|
|
"calibration/coverage@30%": 0.855859375,
|
|
"calibration/coverage@5%": 0.36015625,
|
|
"calibration/ece": 0.11590049566667984,
|
|
"calibration/mean_confidence": 0.6436574303181949,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 435.0,
|
|
"completions/max_terminated_length": 435.0,
|
|
"completions/mean_length": 170.905078125,
|
|
"completions/mean_terminated_length": 170.905078125,
|
|
"completions/min_length": 81.4,
|
|
"completions/min_terminated_length": 81.4,
|
|
"epoch": 0.736,
|
|
"grad_norm": 0.0008474554633721709,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 769928321.0,
|
|
"reward": 1.0496367454528808,
|
|
"reward_std": 0.06413321122527123,
|
|
"rewards/accuracy_reward": 0.62734375,
|
|
"rewards/brier_reward": 0.8414469003677368,
|
|
"rewards/confidence_uniqueness_reward": 0.9428779602050781,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0020532570313662292,
|
|
"rewards/frontier_coverage_1": 0.12552973330020906,
|
|
"rewards/frontier_coverage_10": 0.12272518426179886,
|
|
"rewards/frontier_coverage_15": 0.09166048467159271,
|
|
"rewards/frontier_coverage_20": 0.08297713249921798,
|
|
"rewards/frontier_coverage_25": 0.14741043150424957,
|
|
"rewards/frontier_coverage_5": 0.12552973330020906,
|
|
"rewards/frontier_ece_reward": 0.004045005375519395,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07962646484375,
|
|
"signal/accuracy_reward/group_std_mean": 0.10646625757217407,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039813232421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.039813232421875,
|
|
"signal/advantage_abs_mean": 0.04857211783528328,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04857211783528328,
|
|
"signal/advantage_pre_scale_std": 0.09665304124355316,
|
|
"signal/advantage_std": 0.09665304124355316,
|
|
"signal/brier_reward/centered_abs_mean": 0.10986697971820832,
|
|
"signal/brier_reward/group_std_mean": 0.14322306513786315,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01373337246477604,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01373337246477604,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027514719963073732,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.034696760773658755,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034393399953842165,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034393399953842165,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002127653080970049,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035277999471873046,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.808498804573901e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.808498804573901e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13504576981067656,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17763448357582093,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024173191748559477,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024173191748559477,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12984325736761093,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17088421881198884,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002324194274842739,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002324194274842739,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0858396053314209,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11354650110006333,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015365288127213717,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015365288127213717,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06296655610203743,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08114814162254333,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011271013412624598,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011271013412624598,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07672480046749115,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09848825186491013,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001373373856768012,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001373373856768012,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13504576981067656,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17763448357582093,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024173191748559477,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024173191748559477,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0035334643442183735,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00461051557213068,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004416830430272967,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004416830430272967,
|
|
"step": 230
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23552448749974886,
|
|
"calibration/batch_distribution_entropy": 0.8891411453699248,
|
|
"calibration/buffer_distribution_entropy": 0.9164292924358632,
|
|
"calibration/confidence_entropy": 0.37883301086559096,
|
|
"calibration/coverage@0%": 0.005859375,
|
|
"calibration/coverage@1%": 0.005859375,
|
|
"calibration/coverage@10%": 0.271484375,
|
|
"calibration/coverage@15%": 0.36484375,
|
|
"calibration/coverage@20%": 0.439453125,
|
|
"calibration/coverage@25%": 0.56953125,
|
|
"calibration/coverage@30%": 0.712890625,
|
|
"calibration/coverage@5%": 0.1296875,
|
|
"calibration/ece": 0.13301972715254065,
|
|
"calibration/mean_confidence": 0.5455050746940232,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 494.4,
|
|
"completions/max_terminated_length": 494.4,
|
|
"completions/mean_length": 169.4642578125,
|
|
"completions/mean_terminated_length": 169.4642578125,
|
|
"completions/min_length": 84.8,
|
|
"completions/min_terminated_length": 84.8,
|
|
"epoch": 0.752,
|
|
"grad_norm": 0.0008214873378165066,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 786890835.0,
|
|
"reward": 1.0381749868392944,
|
|
"reward_std": 0.06949230208992958,
|
|
"rewards/accuracy_reward": 0.60712890625,
|
|
"rewards/brier_reward": 0.8299033284187317,
|
|
"rewards/confidence_uniqueness_reward": 0.9462954044342041,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002404158003628254,
|
|
"rewards/frontier_coverage_1": 0.12939749360084535,
|
|
"rewards/frontier_coverage_10": 0.12573150247335435,
|
|
"rewards/frontier_coverage_15": 0.08957693502306938,
|
|
"rewards/frontier_coverage_20": 0.07676424533128738,
|
|
"rewards/frontier_coverage_25": 0.1295778825879097,
|
|
"rewards/frontier_coverage_5": 0.12939749360084535,
|
|
"rewards/frontier_ece_reward": 0.003980603208765388,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.083404541015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1106999933719635,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.684375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0417022705078125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0417022705078125,
|
|
"signal/advantage_abs_mean": 0.052297231554985044,
|
|
"signal/advantage_pre_scale_abs_mean": 0.052297231554985044,
|
|
"signal/advantage_pre_scale_std": 0.10435400754213334,
|
|
"signal/advantage_std": 0.10435400754213334,
|
|
"signal/brier_reward/centered_abs_mean": 0.10560693740844726,
|
|
"signal/brier_reward/group_std_mean": 0.14195962697267533,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013200867176055908,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013200867176055908,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024635595083236695,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03171119168400764,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003079449385404587,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003079449385404587,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022906261961907147,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003826328832656145,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.100220685359091e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.100220685359091e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1227890282869339,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16787476241588592,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021979236509650944,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021979236509650944,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11806153655052185,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1615957111120224,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021133014233782887,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021133014233782887,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07612589448690414,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10439666956663132,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013626534724608063,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013626534724608063,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05772598385810852,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07624504715204239,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010332950623705983,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010332950623705983,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0768646091222763,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09990313202142716,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013758764602243901,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013758764602243901,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1227890282869339,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16787476241588592,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021979236509650944,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021979236509650944,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003331187181174755,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00445093372836709,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004163983976468444,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004163983976468444,
|
|
"step": 235
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19536266164648816,
|
|
"calibration/batch_distribution_entropy": 0.920489897414648,
|
|
"calibration/buffer_distribution_entropy": 0.9151407350609281,
|
|
"calibration/confidence_entropy": 0.40926557599582036,
|
|
"calibration/coverage@0%": 0.073046875,
|
|
"calibration/coverage@1%": 0.09140625,
|
|
"calibration/coverage@10%": 0.42265625,
|
|
"calibration/coverage@15%": 0.50078125,
|
|
"calibration/coverage@20%": 0.593359375,
|
|
"calibration/coverage@25%": 0.65390625,
|
|
"calibration/coverage@30%": 0.7390625,
|
|
"calibration/coverage@5%": 0.28359375,
|
|
"calibration/ece": 0.14984672133896465,
|
|
"calibration/mean_confidence": 0.5657741534754462,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 430.6,
|
|
"completions/max_terminated_length": 430.6,
|
|
"completions/mean_length": 174.3123046875,
|
|
"completions/mean_terminated_length": 174.3123046875,
|
|
"completions/min_length": 82.2,
|
|
"completions/min_terminated_length": 82.2,
|
|
"epoch": 0.768,
|
|
"grad_norm": 0.0011468707816675305,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 803608497.0,
|
|
"reward": 1.0229015827178956,
|
|
"reward_std": 0.06426062434911728,
|
|
"rewards/accuracy_reward": 0.56943359375,
|
|
"rewards/brier_reward": 0.8373586058616638,
|
|
"rewards/confidence_uniqueness_reward": 0.9492919921875,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0022013495909050107,
|
|
"rewards/frontier_coverage_1": 0.16131471395492553,
|
|
"rewards/frontier_coverage_10": 0.1579432725906372,
|
|
"rewards/frontier_coverage_15": 0.10936646610498428,
|
|
"rewards/frontier_coverage_20": 0.08728825151920319,
|
|
"rewards/frontier_coverage_25": 0.12453770935535431,
|
|
"rewards/frontier_coverage_5": 0.16131471395492553,
|
|
"rewards/frontier_ece_reward": 0.004330319678410887,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.071881103515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.09971266686916351,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.696875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0359405517578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0359405517578125,
|
|
"signal/advantage_abs_mean": 0.04755199551582336,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04755199551582336,
|
|
"signal/advantage_pre_scale_std": 0.09524376839399337,
|
|
"signal/advantage_std": 0.09524376839399337,
|
|
"signal/brier_reward/centered_abs_mean": 0.10900415778160095,
|
|
"signal/brier_reward/group_std_mean": 0.14201750457286835,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01362551972270012,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01362551972270012,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021919608116149902,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02759426794946194,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002739951014518738,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002739951014518738,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00208567357622087,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0037588839419186114,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.733355588337872e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.733355588337872e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1343323200941086,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17623608708381652,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002404548367485404,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002404548367485404,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12876609861850738,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16904014348983765,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023049130104482174,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023049130104482174,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08204463869333267,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10765648931264878,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014685989357531072,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014685989357531072,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06093166768550873,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07744322419166565,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001090676779858768,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001090676779858768,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07726499885320663,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0991871863603592,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013830434065312148,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013830434065312148,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1343323200941086,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17623608708381652,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002404548367485404,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002404548367485404,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003368105459958315,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0044054843485355375,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004210131824947894,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004210131824947894,
|
|
"step": 240
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25572485838844444,
|
|
"calibration/batch_distribution_entropy": 0.8792167443629764,
|
|
"calibration/buffer_distribution_entropy": 0.9126122665055396,
|
|
"calibration/confidence_entropy": 0.3721657687613595,
|
|
"calibration/coverage@0%": 0.003515625,
|
|
"calibration/coverage@1%": 0.003515625,
|
|
"calibration/coverage@10%": 0.14140625,
|
|
"calibration/coverage@15%": 0.30270521242632614,
|
|
"calibration/coverage@20%": 0.38070205058939094,
|
|
"calibration/coverage@25%": 0.5285839268172887,
|
|
"calibration/coverage@30%": 0.6657830918467583,
|
|
"calibration/coverage@5%": 0.078515625,
|
|
"calibration/ece": 0.14647233710394375,
|
|
"calibration/mean_confidence": 0.5808501109260684,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 768.2,
|
|
"completions/max_terminated_length": 550.6,
|
|
"completions/mean_length": 174.3005859375,
|
|
"completions/mean_terminated_length": 174.03550720214844,
|
|
"completions/min_length": 82.4,
|
|
"completions/min_terminated_length": 82.4,
|
|
"epoch": 0.784,
|
|
"grad_norm": 0.000932548544369638,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0014,
|
|
"num_tokens": 820567703.0,
|
|
"reward": 1.042500340938568,
|
|
"reward_std": 0.07330340743064881,
|
|
"rewards/accuracy_reward": 0.62099609375,
|
|
"rewards/brier_reward": 0.819736099243164,
|
|
"rewards/confidence_uniqueness_reward": 0.9482918739318847,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0020144137553870676,
|
|
"rewards/frontier_coverage_1": 0.10770976990461349,
|
|
"rewards/frontier_coverage_10": 0.10597532391548156,
|
|
"rewards/frontier_coverage_15": 0.07634644880890847,
|
|
"rewards/frontier_coverage_20": 0.071499665081501,
|
|
"rewards/frontier_coverage_25": 0.13427656888961792,
|
|
"rewards/frontier_coverage_5": 0.10770976990461349,
|
|
"rewards/frontier_ece_reward": 0.003026763442903757,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.091192626953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12209666967391967,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0455963134765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0455963134765625,
|
|
"signal/advantage_abs_mean": 0.05459719970822334,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05459719970822334,
|
|
"signal/advantage_pre_scale_std": 0.10620496869087219,
|
|
"signal/advantage_std": 0.10620496869087219,
|
|
"signal/brier_reward/centered_abs_mean": 0.11177153140306473,
|
|
"signal/brier_reward/group_std_mean": 0.14610156714916228,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013971441425383091,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013971441425383091,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02218124717473984,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02863222174346447,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00277265589684248,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00277265589684248,
|
|
"signal/format_reward/centered_abs_mean": 0.000555419921875,
|
|
"signal/format_reward/group_std_mean": 0.0013209730386734009,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020621836418285968,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003778617037460208,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.691308629640844e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.691308629640844e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13576821088790894,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1774687796831131,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002430250868201256,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002430250868201256,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12766512483358383,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16704229712486268,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002285205526277423,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002285205526277423,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.080125692486763,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10491674393415451,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001434249896556139,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001434249896556139,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06089780628681183,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07763027101755142,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010900706751272083,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010900706751272083,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0802333727478981,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10428185015916824,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014361773384734989,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014361773384734989,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13576821088790894,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1774687796831131,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002430250868201256,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002430250868201256,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003358669299632311,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004410902410745621,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004198336624540389,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004198336624540389,
|
|
"step": 245
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1811316225816756,
|
|
"calibration/batch_distribution_entropy": 0.8751262833350175,
|
|
"calibration/buffer_distribution_entropy": 0.910228495124134,
|
|
"calibration/confidence_entropy": 0.3729012404104689,
|
|
"calibration/coverage@0%": 0.034375,
|
|
"calibration/coverage@1%": 0.034375,
|
|
"calibration/coverage@10%": 0.305859375,
|
|
"calibration/coverage@15%": 0.504296875,
|
|
"calibration/coverage@20%": 0.6296875,
|
|
"calibration/coverage@25%": 0.739453125,
|
|
"calibration/coverage@30%": 0.812109375,
|
|
"calibration/coverage@5%": 0.15546875,
|
|
"calibration/ece": 0.10429695437782113,
|
|
"calibration/mean_confidence": 0.5453522137018766,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 425.0,
|
|
"completions/max_terminated_length": 425.0,
|
|
"completions/mean_length": 175.34814453125,
|
|
"completions/mean_terminated_length": 175.34814453125,
|
|
"completions/min_length": 84.8,
|
|
"completions/min_terminated_length": 84.8,
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.0009130456601269543,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 837373828.0,
|
|
"reward": 1.0609445571899414,
|
|
"reward_std": 0.06633923426270485,
|
|
"rewards/accuracy_reward": 0.64677734375,
|
|
"rewards/brier_reward": 0.8513461947441101,
|
|
"rewards/confidence_uniqueness_reward": 0.9482261657714843,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0015828640898689628,
|
|
"rewards/frontier_coverage_1": 0.11865575462579728,
|
|
"rewards/frontier_coverage_10": 0.1139179825782776,
|
|
"rewards/frontier_coverage_15": 0.08259946554899215,
|
|
"rewards/frontier_coverage_20": 0.08261324763298035,
|
|
"rewards/frontier_coverage_25": 0.16533060371875763,
|
|
"rewards/frontier_coverage_5": 0.11865575462579728,
|
|
"rewards/frontier_ece_reward": 0.0034714728593826295,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.088458251953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11326353400945663,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0442291259765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0442291259765625,
|
|
"signal/advantage_abs_mean": 0.051153923571109775,
|
|
"signal/advantage_pre_scale_abs_mean": 0.051153923571109775,
|
|
"signal/advantage_pre_scale_std": 0.10137955248355865,
|
|
"signal/advantage_std": 0.10137955248355865,
|
|
"signal/brier_reward/centered_abs_mean": 0.10467512607574463,
|
|
"signal/brier_reward/group_std_mean": 0.13507361710071564,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013084390759468078,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013084390759468078,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02194211483001709,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.028182218968868255,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002742764353752136,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002742764353752136,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001707544713281095,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002929617092013359,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.056504938285798e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.056504938285798e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13313665091991425,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17197324931621552,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002383145969361067,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002383145969361067,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12428333461284638,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16083629578351974,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002224671561270952,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002224671561270952,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07507807612419129,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09738193154335022,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013438975671306252,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013438975671306252,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.058079701662063596,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07403742522001266,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001039626623969525,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001039626623969525,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0810657873749733,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10496636033058167,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001451077568344772,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001451077568344772,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13313665091991425,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17197324931621552,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002383145969361067,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002383145969361067,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003261947957798839,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0042387610767036675,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00040774349472485485,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00040774349472485485,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"eval_calibration/aurc": 0.5180207117377988,
|
|
"eval_calibration/batch_distribution_entropy": 0.8033441147208936,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9092439339504332,
|
|
"eval_calibration/confidence_entropy": 0.34970418230705264,
|
|
"eval_calibration/coverage@0%": 0.03125,
|
|
"eval_calibration/coverage@1%": 0.03125,
|
|
"eval_calibration/coverage@10%": 0.03125,
|
|
"eval_calibration/coverage@15%": 0.03125,
|
|
"eval_calibration/coverage@20%": 0.046875,
|
|
"eval_calibration/coverage@25%": 0.046875,
|
|
"eval_calibration/coverage@30%": 0.078125,
|
|
"eval_calibration/coverage@5%": 0.03125,
|
|
"eval_calibration/ece": 0.2625827961395344,
|
|
"eval_calibration/mean_confidence": 0.5134601332432174,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 325.0,
|
|
"eval_completions/max_terminated_length": 325.0,
|
|
"eval_completions/mean_length": 181.17066192626953,
|
|
"eval_completions/mean_terminated_length": 181.17066192626953,
|
|
"eval_completions/min_length": 103.25,
|
|
"eval_completions/min_terminated_length": 103.25,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 837373828.0,
|
|
"eval_reward": 0.9405190795660019,
|
|
"eval_reward_std": 0.24622543156147003,
|
|
"eval_rewards/accuracy_reward": 0.4296875,
|
|
"eval_rewards/brier_reward": 0.7792998254299164,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.89208984375,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.005082366755232215,
|
|
"eval_rewards/frontier_coverage_1": 0.2168629802763462,
|
|
"eval_rewards/frontier_coverage_10": 0.20569873228669167,
|
|
"eval_rewards/frontier_coverage_15": 0.1242841575294733,
|
|
"eval_rewards/frontier_coverage_20": 0.0813782811164856,
|
|
"eval_rewards/frontier_coverage_25": 0.06741005275398493,
|
|
"eval_rewards/frontier_coverage_5": 0.2168629802763462,
|
|
"eval_rewards/frontier_ece_reward": 0.0040713000344112515,
|
|
"eval_runtime": 17.9812,
|
|
"eval_samples_per_second": 27.807,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.47216796875,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4931754469871521,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.236083984375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.236083984375,
|
|
"eval_signal/advantage_abs_mean": 0.22818677872419357,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.22818677872419357,
|
|
"eval_signal/advantage_pre_scale_std": 0.24369388818740845,
|
|
"eval_signal/advantage_std": 0.24369388818740845,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.24806179851293564,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2987174764275551,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031007724814116955,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.031007724814116955,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04681396484375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05593178328126669,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00585174560546875,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00585174560546875,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.007204441004432738,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.015742348041385412,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00012895949657831807,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00012895949657831807,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3721674680709839,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.456407867372036,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0066617976408451796,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0066617976408451796,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.35168465226888657,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4315572455525398,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0062951549189165235,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0062951549189165235,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.2018623724579811,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.2502391189336777,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003613336244598031,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003613336244598031,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.12181077525019646,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.14741826057434082,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021804128773510456,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021804128773510456,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.18845771625638008,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.25003478676080704,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003373392974026501,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003373392974026501,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3721674680709839,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.456407867372036,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0066617976408451796,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0066617976408451796,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.00661488005425781,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.008529237005859613,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008268600067822263,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008268600067822263,
|
|
"eval_steps_per_second": 0.222,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"step": 250,
|
|
"train_probe_calibration/aurc": 0.17399068903789838,
|
|
"train_probe_calibration/batch_distribution_entropy": 0.7855592365623258,
|
|
"train_probe_calibration/buffer_distribution_entropy": 0.9093291270316723,
|
|
"train_probe_calibration/confidence_entropy": 0.35752119805263033,
|
|
"train_probe_calibration/coverage@0%": 0.265625,
|
|
"train_probe_calibration/coverage@1%": 0.265625,
|
|
"train_probe_calibration/coverage@10%": 0.5390625,
|
|
"train_probe_calibration/coverage@15%": 0.578125,
|
|
"train_probe_calibration/coverage@20%": 0.6796875,
|
|
"train_probe_calibration/coverage@25%": 0.7734375,
|
|
"train_probe_calibration/coverage@30%": 0.875,
|
|
"train_probe_calibration/coverage@5%": 0.265625,
|
|
"train_probe_calibration/ece": 0.2185999273752981,
|
|
"train_probe_calibration/mean_confidence": 0.5888696124059519,
|
|
"train_probe_completions/clipped_ratio": 0.0,
|
|
"train_probe_completions/max_length": 301.0,
|
|
"train_probe_completions/max_terminated_length": 301.0,
|
|
"train_probe_completions/mean_length": 176.68392944335938,
|
|
"train_probe_completions/mean_terminated_length": 176.68392944335938,
|
|
"train_probe_completions/min_length": 100.25,
|
|
"train_probe_completions/min_terminated_length": 100.25,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 837373828.0,
|
|
"train_probe_reward": 1.0589460730552673,
|
|
"train_probe_reward_std": 0.2253180705010891,
|
|
"train_probe_rewards/accuracy_reward": 0.654296875,
|
|
"train_probe_rewards/brier_reward": 0.8551206290721893,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.8935546875,
|
|
"train_probe_rewards/format_reward": 1.0,
|
|
"train_probe_rewards/frontier_aurc_reward": -0.0010356987913837656,
|
|
"train_probe_rewards/frontier_coverage_1": 0.12353460118174553,
|
|
"train_probe_rewards/frontier_coverage_10": 0.11712087318301201,
|
|
"train_probe_rewards/frontier_coverage_15": 0.08624438382685184,
|
|
"train_probe_rewards/frontier_coverage_20": 0.0883408710360527,
|
|
"train_probe_rewards/frontier_coverage_25": 0.1752123422920704,
|
|
"train_probe_rewards/frontier_coverage_5": 0.12353460118174553,
|
|
"train_probe_rewards/frontier_ece_reward": 0.0036108798813074827,
|
|
"train_probe_runtime": 16.9813,
|
|
"train_probe_samples_per_second": 29.444,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4420166015625,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.47711893171072006,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22100830078125,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.22100830078125,
|
|
"train_probe_signal/advantage_abs_mean": 0.20390921458601952,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.20390921458601952,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.22289463132619858,
|
|
"train_probe_signal/advantage_std": 0.22289463132619858,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.17955372482538223,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.24644171074032784,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02244421560317278,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.02244421560317278,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0458221435546875,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.054768980480730534,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0057277679443359375,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0057277679443359375,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0018088824581354856,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.003385799122042954,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.237899409214151e-05,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.237899409214151e-05,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.34778689593076706,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.4604829102754593,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006225385353900492,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006225385353900492,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.32435665279626846,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.431157648563385,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005805984023027122,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005805984023027122,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.18024399504065514,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.24868060275912285,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032263672328554094,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032263672328554094,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.10621210373938084,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.13919900357723236,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019011966069228947,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019011966069228947,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.18446215242147446,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.2188771776854992,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003301872464362532,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003301872464362532,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.34778689593076706,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.4604829102754593,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006225385353900492,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006225385353900492,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.005974971689283848,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.008375309873372316,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000746871461160481,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000746871461160481,
|
|
"train_probe_steps_per_second": 0.236
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18597314571416296,
|
|
"calibration/batch_distribution_entropy": 0.8201437592495328,
|
|
"calibration/buffer_distribution_entropy": 0.9071677287932329,
|
|
"calibration/confidence_entropy": 0.3449699186391227,
|
|
"calibration/coverage@0%": 0.006640625,
|
|
"calibration/coverage@1%": 0.006640625,
|
|
"calibration/coverage@10%": 0.2734375,
|
|
"calibration/coverage@15%": 0.45234375,
|
|
"calibration/coverage@20%": 0.64765625,
|
|
"calibration/coverage@25%": 0.739453125,
|
|
"calibration/coverage@30%": 0.872265625,
|
|
"calibration/coverage@5%": 0.059375,
|
|
"calibration/ece": 0.13387027546349461,
|
|
"calibration/mean_confidence": 0.6017411624426133,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 646.4,
|
|
"completions/max_terminated_length": 421.2,
|
|
"completions/mean_length": 175.79228515625,
|
|
"completions/mean_terminated_length": 175.66018371582032,
|
|
"completions/min_length": 87.8,
|
|
"completions/min_terminated_length": 87.8,
|
|
"epoch": 0.816,
|
|
"grad_norm": 0.0009323036065325141,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 854273109.0,
|
|
"reward": 1.0501249313354493,
|
|
"reward_std": 0.06620060950517655,
|
|
"rewards/accuracy_reward": 0.64033203125,
|
|
"rewards/brier_reward": 0.8141647100448608,
|
|
"rewards/confidence_uniqueness_reward": 0.9440381526947021,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0026771835517138244,
|
|
"rewards/frontier_coverage_1": 0.08918848186731339,
|
|
"rewards/frontier_coverage_10": 0.08616004511713982,
|
|
"rewards/frontier_coverage_15": 0.06467956006526947,
|
|
"rewards/frontier_coverage_20": 0.07265233993530273,
|
|
"rewards/frontier_coverage_25": 0.15391016006469727,
|
|
"rewards/frontier_coverage_5": 0.08918848186731339,
|
|
"rewards/frontier_ece_reward": 0.0026550061535090207,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.078704833984375,
|
|
"signal/accuracy_reward/group_std_mean": 0.10836423933506012,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0393524169921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0393524169921875,
|
|
"signal/advantage_abs_mean": 0.049061907827854155,
|
|
"signal/advantage_pre_scale_abs_mean": 0.049061907827854155,
|
|
"signal/advantage_pre_scale_std": 0.09836698472499847,
|
|
"signal/advantage_std": 0.09836698472499847,
|
|
"signal/brier_reward/centered_abs_mean": 0.1172541081905365,
|
|
"signal/brier_reward/group_std_mean": 0.1511300802230835,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014656763523817062,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014656763523817062,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024897144734859468,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.032081881910562514,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031121430918574335,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031121430918574335,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027089090086519717,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0045408796519041065,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.848946919082664e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.848946919082664e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13663374185562133,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1800040602684021,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002445743978023529,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002445743978023529,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1276185154914856,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16832120418548585,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022843712475150825,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022843712475150825,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07773556411266327,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10220663100481034,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013914665207266808,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013914665207266808,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06176744028925896,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07860565781593323,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011056371731683612,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011056371731683612,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08826594352722168,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11294655352830887,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015799603424966335,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015799603424966335,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13663374185562133,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1800040602684021,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002445743978023529,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002445743978023529,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003223916422575712,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004277074383571744,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000402989552821964,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000402989552821964,
|
|
"step": 255
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23470730705748308,
|
|
"calibration/batch_distribution_entropy": 0.8653603005944364,
|
|
"calibration/buffer_distribution_entropy": 0.9029355697295275,
|
|
"calibration/confidence_entropy": 0.360791347705753,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0484375,
|
|
"calibration/coverage@10%": 0.22890625,
|
|
"calibration/coverage@15%": 0.322265625,
|
|
"calibration/coverage@20%": 0.48125,
|
|
"calibration/coverage@25%": 0.6,
|
|
"calibration/coverage@30%": 0.690234375,
|
|
"calibration/coverage@5%": 0.18515625,
|
|
"calibration/ece": 0.11370581296588214,
|
|
"calibration/mean_confidence": 0.5632247972443706,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 632.6,
|
|
"completions/max_terminated_length": 421.0,
|
|
"completions/mean_length": 179.2326171875,
|
|
"completions/mean_terminated_length": 179.10030822753907,
|
|
"completions/min_length": 87.8,
|
|
"completions/min_terminated_length": 87.8,
|
|
"epoch": 0.832,
|
|
"grad_norm": 0.0008252764237113297,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 871116803.0,
|
|
"reward": 1.041144061088562,
|
|
"reward_std": 0.06580123379826545,
|
|
"rewards/accuracy_reward": 0.60849609375,
|
|
"rewards/brier_reward": 0.8426036357879638,
|
|
"rewards/confidence_uniqueness_reward": 0.9396020889282226,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0020183057175017895,
|
|
"rewards/frontier_coverage_1": 0.14359851330518722,
|
|
"rewards/frontier_coverage_10": 0.13196637630462646,
|
|
"rewards/frontier_coverage_15": 0.09356682449579239,
|
|
"rewards/frontier_coverage_20": 0.0918369397521019,
|
|
"rewards/frontier_coverage_25": 0.1650959938764572,
|
|
"rewards/frontier_coverage_5": 0.14359851330518722,
|
|
"rewards/frontier_ece_reward": 0.0038169843144714834,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.080181884765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11224258989095688,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0400909423828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0400909423828125,
|
|
"signal/advantage_abs_mean": 0.04792519509792328,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04792519509792328,
|
|
"signal/advantage_pre_scale_std": 0.09914593994617463,
|
|
"signal/advantage_std": 0.09914593994617463,
|
|
"signal/brier_reward/centered_abs_mean": 0.10063754320144654,
|
|
"signal/brier_reward/group_std_mean": 0.13023419976234435,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012579692900180817,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012579692900180817,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02736304737627506,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.035405050963163376,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034203809220343826,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034203809220343826,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018809714587405325,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032024606596678497,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3669386903056874e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3669386903056874e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12828720062971116,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.167554047703743,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022963409312069414,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022963409312069414,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11622040122747421,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15186418890953063,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020803450839594006,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020803450839594006,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07340935990214348,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09487757980823516,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013140274910256266,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013140274910256266,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.056497588753700256,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07151806354522705,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010113068157806993,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010113068157806993,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07748262286186218,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10127020329236984,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001386938919313252,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001386938919313252,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12828720062971116,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.167554047703743,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022963409312069414,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022963409312069414,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0030659837648272514,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0040200600866228346,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.034375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00038324797060340643,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00038324797060340643,
|
|
"step": 260
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19863410150802505,
|
|
"calibration/batch_distribution_entropy": 0.8434263072179384,
|
|
"calibration/buffer_distribution_entropy": 0.9009345427966128,
|
|
"calibration/confidence_entropy": 0.36509388162027195,
|
|
"calibration/coverage@0%": 0.01171875,
|
|
"calibration/coverage@1%": 0.01171875,
|
|
"calibration/coverage@10%": 0.35859375,
|
|
"calibration/coverage@15%": 0.484375,
|
|
"calibration/coverage@20%": 0.5640625,
|
|
"calibration/coverage@25%": 0.616796875,
|
|
"calibration/coverage@30%": 0.68828125,
|
|
"calibration/coverage@5%": 0.2640625,
|
|
"calibration/ece": 0.09913607843357039,
|
|
"calibration/mean_confidence": 0.638816903409418,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 515.8,
|
|
"completions/max_terminated_length": 515.8,
|
|
"completions/mean_length": 179.28642578125,
|
|
"completions/mean_terminated_length": 179.28642578125,
|
|
"completions/min_length": 88.6,
|
|
"completions/min_terminated_length": 88.6,
|
|
"epoch": 0.848,
|
|
"grad_norm": 0.000993276946246624,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 887967064.0,
|
|
"reward": 1.0350669145584106,
|
|
"reward_std": 0.06454772800207138,
|
|
"rewards/accuracy_reward": 0.59892578125,
|
|
"rewards/brier_reward": 0.8351063370704651,
|
|
"rewards/confidence_uniqueness_reward": 0.9449150085449218,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.00206311687361449,
|
|
"rewards/frontier_coverage_1": 0.1330704927444458,
|
|
"rewards/frontier_coverage_10": 0.12332247197628021,
|
|
"rewards/frontier_coverage_15": 0.08595439046621323,
|
|
"rewards/frontier_coverage_20": 0.08236979991197586,
|
|
"rewards/frontier_coverage_25": 0.15357653945684432,
|
|
"rewards/frontier_coverage_5": 0.1330704927444458,
|
|
"rewards/frontier_ece_reward": 0.003629566542804241,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.073809814453125,
|
|
"signal/accuracy_reward/group_std_mean": 0.09948968291282653,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.709375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0369049072265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0369049072265625,
|
|
"signal/advantage_abs_mean": 0.04851563647389412,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04851563647389412,
|
|
"signal/advantage_pre_scale_std": 0.09739507734775543,
|
|
"signal/advantage_std": 0.09739507734775543,
|
|
"signal/brier_reward/centered_abs_mean": 0.10919748991727829,
|
|
"signal/brier_reward/group_std_mean": 0.14081784188747407,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013649686239659786,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013649686239659786,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024629361182451247,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03201264031231403,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003078670147806406,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003078670147806406,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020340461749583484,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003400903893634677,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6409427048056385e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6409427048056385e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12788800597190858,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16677136719226837,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002289195219054818,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002289195219054818,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11876944452524185,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1548892468214035,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021259729750454427,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021259729750454427,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07295108437538148,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09489114880561829,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013058244483545422,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013058244483545422,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05809517651796341,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07366363406181335,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010399035876616836,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010399035876616836,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08594117909669877,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1105627328157425,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001538347010500729,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001538347010500729,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12788800597190858,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16677136719226837,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002289195219054818,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002289195219054818,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0031425395514816045,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004072493128478527,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.03125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00039281744393520056,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00039281744393520056,
|
|
"step": 265
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1838647683664046,
|
|
"calibration/batch_distribution_entropy": 0.8080613044753928,
|
|
"calibration/buffer_distribution_entropy": 0.899667503332226,
|
|
"calibration/confidence_entropy": 0.35472012648297896,
|
|
"calibration/coverage@0%": 0.015625,
|
|
"calibration/coverage@1%": 0.015625,
|
|
"calibration/coverage@10%": 0.275390625,
|
|
"calibration/coverage@15%": 0.397265625,
|
|
"calibration/coverage@20%": 0.58984375,
|
|
"calibration/coverage@25%": 0.82734375,
|
|
"calibration/coverage@30%": 0.880078125,
|
|
"calibration/coverage@5%": 0.18203125,
|
|
"calibration/ece": 0.13873622892694146,
|
|
"calibration/mean_confidence": 0.6925125915452537,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 476.8,
|
|
"completions/max_terminated_length": 476.8,
|
|
"completions/mean_length": 181.34345703125,
|
|
"completions/mean_terminated_length": 181.34345703125,
|
|
"completions/min_length": 87.8,
|
|
"completions/min_terminated_length": 87.8,
|
|
"epoch": 0.864,
|
|
"grad_norm": 0.0011021445970982313,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 904810837.0,
|
|
"reward": 1.054364514350891,
|
|
"reward_std": 0.06596897840499878,
|
|
"rewards/accuracy_reward": 0.6439453125,
|
|
"rewards/brier_reward": 0.8299910545349121,
|
|
"rewards/confidence_uniqueness_reward": 0.9402127981185913,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.001953143556602299,
|
|
"rewards/frontier_coverage_1": 0.09931659996509552,
|
|
"rewards/frontier_coverage_10": 0.09283578842878341,
|
|
"rewards/frontier_coverage_15": 0.06951518058776855,
|
|
"rewards/frontier_coverage_20": 0.07742422819137573,
|
|
"rewards/frontier_coverage_25": 0.16652192324399948,
|
|
"rewards/frontier_coverage_5": 0.09931659996509552,
|
|
"rewards/frontier_ece_reward": 0.0029755703639239074,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0814697265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10998818576335907,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04073486328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04073486328125,
|
|
"signal/advantage_abs_mean": 0.04864993765950203,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04864993765950203,
|
|
"signal/advantage_pre_scale_std": 0.09855391681194306,
|
|
"signal/advantage_std": 0.09855391681194306,
|
|
"signal/brier_reward/centered_abs_mean": 0.10496192872524261,
|
|
"signal/brier_reward/group_std_mean": 0.13740627765655516,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013120241090655326,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013120241090655326,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026168223470449448,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0334943987429142,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003271027933806181,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003271027933806181,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018899486400187015,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0030752378050237896,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.38300786097534e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.38300786097534e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12643099427223206,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16757656931877135,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00226311469450593,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00226311469450593,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11652288883924485,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15484984815120698,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020857596304267646,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020857596304267646,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07096642255783081,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09379614144563675,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012702989391982556,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012702989391982556,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05811881348490715,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07434172034263611,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010403267107903958,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010403267107903958,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08379273712635041,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10927441716194153,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014998900005593896,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014998900005593896,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12643099427223206,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16757656931877135,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00226311469450593,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00226311469450593,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.002925369096919894,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.003875131858512759,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00036567113711498677,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00036567113711498677,
|
|
"step": 270
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3097481712777975,
|
|
"calibration/batch_distribution_entropy": 0.8705793388016131,
|
|
"calibration/buffer_distribution_entropy": 0.8972102592893678,
|
|
"calibration/confidence_entropy": 0.35807765865513697,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.08438340875733855,
|
|
"calibration/coverage@15%": 0.18757797211350294,
|
|
"calibration/coverage@20%": 0.3360514004403131,
|
|
"calibration/coverage@25%": 0.4349307424168297,
|
|
"calibration/coverage@30%": 0.5115177042563601,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.15975441040071203,
|
|
"calibration/mean_confidence": 0.5834101912294416,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 537.0,
|
|
"completions/max_terminated_length": 537.0,
|
|
"completions/mean_length": 181.69228515625,
|
|
"completions/mean_terminated_length": 181.69228515625,
|
|
"completions/min_length": 88.2,
|
|
"completions/min_terminated_length": 88.2,
|
|
"epoch": 0.88,
|
|
"grad_norm": 0.0011194439139217138,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 921818438.0,
|
|
"reward": 1.0138964176177978,
|
|
"reward_std": 0.06668102517724037,
|
|
"rewards/accuracy_reward": 0.56083984375,
|
|
"rewards/brier_reward": 0.8156968593597412,
|
|
"rewards/confidence_uniqueness_reward": 0.9443087816238404,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0028428094228729606,
|
|
"rewards/frontier_coverage_1": 0.14721233248710633,
|
|
"rewards/frontier_coverage_10": 0.1358731895685196,
|
|
"rewards/frontier_coverage_15": 0.09130201935768127,
|
|
"rewards/frontier_coverage_20": 0.08002846986055374,
|
|
"rewards/frontier_coverage_25": 0.13434360027313233,
|
|
"rewards/frontier_coverage_5": 0.14721233248710633,
|
|
"rewards/frontier_ece_reward": 0.003603707766160369,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.075372314453125,
|
|
"signal/accuracy_reward/group_std_mean": 0.10563235729932785,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0376861572265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0376861572265625,
|
|
"signal/advantage_abs_mean": 0.04881888553500176,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04881888553500176,
|
|
"signal/advantage_pre_scale_std": 0.0986421599984169,
|
|
"signal/advantage_std": 0.0986421599984169,
|
|
"signal/brier_reward/centered_abs_mean": 0.10961353480815887,
|
|
"signal/brier_reward/group_std_mean": 0.14331442713737488,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013701691851019859,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013701691851019859,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0234049953520298,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0299153421074152,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002925624419003725,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002925624419003725,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002696803631260991,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004359624674543738,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.827278316952288e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.827278316952288e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1325247272849083,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1731933742761612,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023721925914287566,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023721925914287566,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12130335420370102,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15855235159397124,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002171329967677593,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002171329967677593,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07543385475873947,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09814363867044448,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013502659741789103,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013502659741789103,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05862127542495728,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07496060281991959,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010493207955732942,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010493207955732942,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08424094766378402,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10983462929725647,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015079128555953504,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015079128555953504,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1325247272849083,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1731933742761612,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023721925914287566,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023721925914287566,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0032404222991317512,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0042282075621187685,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004050527873914689,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004050527873914689,
|
|
"step": 275
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2298308327612087,
|
|
"calibration/batch_distribution_entropy": 0.857929066616121,
|
|
"calibration/buffer_distribution_entropy": 0.8941218456625043,
|
|
"calibration/confidence_entropy": 0.3534088761944342,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.312109375,
|
|
"calibration/coverage@15%": 0.409375,
|
|
"calibration/coverage@20%": 0.5015625,
|
|
"calibration/coverage@25%": 0.56640625,
|
|
"calibration/coverage@30%": 0.671484375,
|
|
"calibration/coverage@5%": 0.21875,
|
|
"calibration/ece": 0.13779791596429622,
|
|
"calibration/mean_confidence": 0.607771240265492,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 455.2,
|
|
"completions/max_terminated_length": 455.2,
|
|
"completions/mean_length": 181.246484375,
|
|
"completions/mean_terminated_length": 181.246484375,
|
|
"completions/min_length": 83.0,
|
|
"completions/min_terminated_length": 83.0,
|
|
"epoch": 0.896,
|
|
"grad_norm": 0.0008350891876034439,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 938785250.0,
|
|
"reward": 1.0364571571350099,
|
|
"reward_std": 0.06273685097694397,
|
|
"rewards/accuracy_reward": 0.60380859375,
|
|
"rewards/brier_reward": 0.8282987594604492,
|
|
"rewards/confidence_uniqueness_reward": 0.9447072267532348,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0020865506259724496,
|
|
"rewards/frontier_coverage_1": 0.12943050265312195,
|
|
"rewards/frontier_coverage_10": 0.11967690885066987,
|
|
"rewards/frontier_coverage_15": 0.08408873230218887,
|
|
"rewards/frontier_coverage_20": 0.08248092979192734,
|
|
"rewards/frontier_coverage_25": 0.15849037170410157,
|
|
"rewards/frontier_coverage_5": 0.12943050265312195,
|
|
"rewards/frontier_ece_reward": 0.0033508573193103074,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.080902099609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1085489347577095,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0404510498046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0404510498046875,
|
|
"signal/advantage_abs_mean": 0.046602561324834826,
|
|
"signal/advantage_pre_scale_abs_mean": 0.046602561324834826,
|
|
"signal/advantage_pre_scale_std": 0.09552292376756669,
|
|
"signal/advantage_std": 0.09552292376756669,
|
|
"signal/brier_reward/centered_abs_mean": 0.10379516333341599,
|
|
"signal/brier_reward/group_std_mean": 0.13422942757606507,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012974395416676998,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012974395416676998,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024189457297325134,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03062896504998207,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003023682162165642,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003023682162165642,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017642589285969735,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002880441676825285,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.158023464493454e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.158023464493454e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13164688944816588,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17034226059913635,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023564792238175867,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023564792238175867,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11969798952341079,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15487854182720184,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021425940096378325,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021425940096378325,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07383271306753159,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0950731098651886,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013216054998338223,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013216054998338223,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05718918889760971,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0726585105061531,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010236864443868398,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010236864443868398,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08015549033880234,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10447021871805191,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014347832417115568,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014347832417115568,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13164688944816588,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17034226059913635,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023564792238175867,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023564792238175867,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003003358468413353,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.003905038023367524,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.040625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00037541980855166913,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00037541980855166913,
|
|
"step": 280
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3031863683876841,
|
|
"calibration/batch_distribution_entropy": 0.9113038282259535,
|
|
"calibration/buffer_distribution_entropy": 0.8915214898997718,
|
|
"calibration/confidence_entropy": 0.3954785856124127,
|
|
"calibration/coverage@0%": 0.0125,
|
|
"calibration/coverage@1%": 0.0125,
|
|
"calibration/coverage@10%": 0.10590600538160469,
|
|
"calibration/coverage@15%": 0.25557041952054793,
|
|
"calibration/coverage@20%": 0.3657679488747554,
|
|
"calibration/coverage@25%": 0.451737555039139,
|
|
"calibration/coverage@30%": 0.556863074853229,
|
|
"calibration/coverage@5%": 0.05626834637964775,
|
|
"calibration/ece": 0.15038636450266335,
|
|
"calibration/mean_confidence": 0.5439663131143553,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 725.0,
|
|
"completions/max_terminated_length": 523.0,
|
|
"completions/mean_length": 184.86611328125,
|
|
"completions/mean_terminated_length": 184.7338653564453,
|
|
"completions/min_length": 79.6,
|
|
"completions/min_terminated_length": 79.6,
|
|
"epoch": 0.912,
|
|
"grad_norm": 0.000809013785328716,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 955729575.0,
|
|
"reward": 1.0262568235397338,
|
|
"reward_std": 0.06516167744994164,
|
|
"rewards/accuracy_reward": 0.58349609375,
|
|
"rewards/brier_reward": 0.8233543515205384,
|
|
"rewards/confidence_uniqueness_reward": 0.9516326189041138,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0018006491474807263,
|
|
"rewards/frontier_coverage_1": 0.13025247156620026,
|
|
"rewards/frontier_coverage_10": 0.12138088643550873,
|
|
"rewards/frontier_coverage_15": 0.08430371508002281,
|
|
"rewards/frontier_coverage_20": 0.08004055321216583,
|
|
"rewards/frontier_coverage_25": 0.1411813259124756,
|
|
"rewards/frontier_coverage_5": 0.13025247156620026,
|
|
"rewards/frontier_ece_reward": 0.0032947796396911146,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.077459716796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.10814465284347534,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.675,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0387298583984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0387298583984375,
|
|
"signal/advantage_abs_mean": 0.047399114817380905,
|
|
"signal/advantage_pre_scale_abs_mean": 0.047399114817380905,
|
|
"signal/advantage_pre_scale_std": 0.09288787245750427,
|
|
"signal/advantage_std": 0.09288787245750427,
|
|
"signal/brier_reward/centered_abs_mean": 0.1129148319363594,
|
|
"signal/brier_reward/group_std_mean": 0.14614371061325074,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014114353992044925,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014114353992044925,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0207143172621727,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.026592843234539032,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025892896577715875,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025892896577715875,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014614747371524573,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0024610649794340133,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6160396009800026e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6160396009800026e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.145956414937973,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1896394670009613,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026126197073608635,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026126197073608635,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13401967734098436,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17390194535255432,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023989521665498613,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023989521665498613,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08284454345703125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10729445815086365,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014829172752797604,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014829172752797604,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.061051695793867114,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07795014530420304,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010928253177553415,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010928253177553415,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08121936470270157,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10639394819736481,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014538265997543931,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014538265997543931,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.145956414937973,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1896394670009613,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026126197073608635,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026126197073608635,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0034357388503849506,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0044421212747693065,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00042946735629811883,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00042946735629811883,
|
|
"step": 285
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2553048750948764,
|
|
"calibration/batch_distribution_entropy": 0.928002335596811,
|
|
"calibration/buffer_distribution_entropy": 0.8940186257378736,
|
|
"calibration/confidence_entropy": 0.4102037582335584,
|
|
"calibration/coverage@0%": 0.022265625,
|
|
"calibration/coverage@1%": 0.022265625,
|
|
"calibration/coverage@10%": 0.240234375,
|
|
"calibration/coverage@15%": 0.29453125,
|
|
"calibration/coverage@20%": 0.359375,
|
|
"calibration/coverage@25%": 0.472265625,
|
|
"calibration/coverage@30%": 0.57890625,
|
|
"calibration/coverage@5%": 0.080078125,
|
|
"calibration/ece": 0.11796843240314943,
|
|
"calibration/mean_confidence": 0.5718039136867346,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 514.2,
|
|
"completions/max_terminated_length": 514.2,
|
|
"completions/mean_length": 181.9462890625,
|
|
"completions/mean_terminated_length": 181.9462890625,
|
|
"completions/min_length": 86.6,
|
|
"completions/min_terminated_length": 86.6,
|
|
"epoch": 0.928,
|
|
"grad_norm": 0.0008436237112618983,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 972619521.0,
|
|
"reward": 1.0353510141372682,
|
|
"reward_std": 0.06413244009017945,
|
|
"rewards/accuracy_reward": 0.606640625,
|
|
"rewards/brier_reward": 0.816413962841034,
|
|
"rewards/confidence_uniqueness_reward": 0.9475692749023438,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0016956059262156487,
|
|
"rewards/frontier_coverage_1": 0.1107865646481514,
|
|
"rewards/frontier_coverage_10": 0.10246082991361619,
|
|
"rewards/frontier_coverage_15": 0.07582455202937126,
|
|
"rewards/frontier_coverage_20": 0.07762015908956528,
|
|
"rewards/frontier_coverage_25": 0.14767933785915374,
|
|
"rewards/frontier_coverage_5": 0.1107865646481514,
|
|
"rewards/frontier_ece_reward": 0.0029826680198311805,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.10661050379276275,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.040234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.040234375,
|
|
"signal/advantage_abs_mean": 0.04859066754579544,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04859066754579544,
|
|
"signal/advantage_pre_scale_std": 0.0963394895195961,
|
|
"signal/advantage_std": 0.0963394895195961,
|
|
"signal/brier_reward/centered_abs_mean": 0.1108618676662445,
|
|
"signal/brier_reward/group_std_mean": 0.14311706721782685,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013857733458280563,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013857733458280563,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023229575157165526,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.029777427762746812,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029036968946456907,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029036968946456907,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015811802353709937,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0027030047960579394,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8303124054218642e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8303124054218642e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14058441817760467,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1819360226392746,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025164610240608455,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025164610240608455,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12620791643857956,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1633853554725647,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022591216024011374,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022591216024011374,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08150058835744858,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10502809584140778,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014588604914024471,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014588604914024471,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06125093549489975,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07751076966524124,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010963917477056385,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010963917477056385,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08199481666088104,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10610374063253403,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014677071943879128,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014677071943879128,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14058441817760467,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1819360226392746,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025164610240608455,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025164610240608455,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003389831865206361,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004321504570543766,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004237289831507951,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004237289831507951,
|
|
"step": 290
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19053175355084856,
|
|
"calibration/batch_distribution_entropy": 0.9305434608784869,
|
|
"calibration/buffer_distribution_entropy": 0.8955746984723145,
|
|
"calibration/confidence_entropy": 0.40345241743169813,
|
|
"calibration/coverage@0%": 0.092578125,
|
|
"calibration/coverage@1%": 0.155078125,
|
|
"calibration/coverage@10%": 0.2984375,
|
|
"calibration/coverage@15%": 0.404296875,
|
|
"calibration/coverage@20%": 0.55546875,
|
|
"calibration/coverage@25%": 0.697265625,
|
|
"calibration/coverage@30%": 0.80078125,
|
|
"calibration/coverage@5%": 0.1890625,
|
|
"calibration/ece": 0.10843311694105173,
|
|
"calibration/mean_confidence": 0.5410684455589483,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 479.0,
|
|
"completions/max_terminated_length": 479.0,
|
|
"completions/mean_length": 182.3708984375,
|
|
"completions/mean_terminated_length": 182.3708984375,
|
|
"completions/min_length": 88.2,
|
|
"completions/min_terminated_length": 88.2,
|
|
"epoch": 0.944,
|
|
"grad_norm": 0.000993796857073903,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 989462423.0,
|
|
"reward": 1.0378493785858154,
|
|
"reward_std": 0.07301433905959129,
|
|
"rewards/accuracy_reward": 0.6052734375,
|
|
"rewards/brier_reward": 0.8297587871551514,
|
|
"rewards/confidence_uniqueness_reward": 0.9487686157226562,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0013114425935782492,
|
|
"rewards/frontier_coverage_1": 0.13030335307121277,
|
|
"rewards/frontier_coverage_10": 0.12149370610713958,
|
|
"rewards/frontier_coverage_15": 0.08871242925524711,
|
|
"rewards/frontier_coverage_20": 0.08486142754554749,
|
|
"rewards/frontier_coverage_25": 0.14197321832180024,
|
|
"rewards/frontier_coverage_5": 0.13030335307121277,
|
|
"rewards/frontier_ece_reward": 0.003458545543253422,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10616455078125,
|
|
"signal/accuracy_reward/group_std_mean": 0.13756768852472306,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.053082275390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.053082275390625,
|
|
"signal/advantage_abs_mean": 0.056171053647994997,
|
|
"signal/advantage_pre_scale_abs_mean": 0.056171053647994997,
|
|
"signal/advantage_pre_scale_std": 0.10713197886943818,
|
|
"signal/advantage_std": 0.10713197886943818,
|
|
"signal/brier_reward/centered_abs_mean": 0.10598112493753434,
|
|
"signal/brier_reward/group_std_mean": 0.1365533709526062,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013247640617191792,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013247640617191792,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022769904136657713,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02863166332244873,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002846238017082214,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002846238017082214,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011110721388831735,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0018645315431058407,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9888190217898226e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9888190217898226e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14970411360263824,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.195058611035347,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026797034312039613,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026797034312039613,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13364054411649703,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17438722848892213,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023921656422317026,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023921656422317026,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08324484527111053,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10912428945302963,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014900827081874013,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014900827081874013,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05944142565131187,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07634605765342713,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010640014894306659,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010640014894306659,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07600450217723846,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09856143593788147,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001360480533912778,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001360480533912778,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14970411360263824,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.195058611035347,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026797034312039613,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026797034312039613,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0033906072843819858,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004434131644666195,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004238259105477482,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004238259105477482,
|
|
"step": 295
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20952501494560322,
|
|
"calibration/batch_distribution_entropy": 0.8843125473748732,
|
|
"calibration/buffer_distribution_entropy": 0.8962001707723987,
|
|
"calibration/confidence_entropy": 0.36058761090983166,
|
|
"calibration/coverage@0%": 0.087890625,
|
|
"calibration/coverage@1%": 0.087890625,
|
|
"calibration/coverage@10%": 0.262109375,
|
|
"calibration/coverage@15%": 0.43671875,
|
|
"calibration/coverage@20%": 0.575,
|
|
"calibration/coverage@25%": 0.6765625,
|
|
"calibration/coverage@30%": 0.759375,
|
|
"calibration/coverage@5%": 0.18125,
|
|
"calibration/ece": 0.13082384829872193,
|
|
"calibration/mean_confidence": 0.571152714201278,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 449.2,
|
|
"completions/max_terminated_length": 449.2,
|
|
"completions/mean_length": 179.28154296875,
|
|
"completions/mean_terminated_length": 179.28154296875,
|
|
"completions/min_length": 87.0,
|
|
"completions/min_terminated_length": 87.0,
|
|
"epoch": 0.96,
|
|
"grad_norm": 0.0006647381815128028,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 1006238586.0,
|
|
"reward": 1.0329123735427856,
|
|
"reward_std": 0.05366669148206711,
|
|
"rewards/accuracy_reward": 0.58798828125,
|
|
"rewards/brier_reward": 0.8446584582328797,
|
|
"rewards/confidence_uniqueness_reward": 0.9450820922851563,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0017721342155709863,
|
|
"rewards/frontier_coverage_1": 0.15945130288600923,
|
|
"rewards/frontier_coverage_10": 0.14589085876941682,
|
|
"rewards/frontier_coverage_15": 0.10300841629505157,
|
|
"rewards/frontier_coverage_20": 0.09736352860927582,
|
|
"rewards/frontier_coverage_25": 0.15814386010169984,
|
|
"rewards/frontier_coverage_5": 0.15945130288600923,
|
|
"rewards/frontier_ece_reward": 0.003961241897195577,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.071124267578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.09625100940465928,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.715625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0355621337890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0355621337890625,
|
|
"signal/advantage_abs_mean": 0.039787986874580385,
|
|
"signal/advantage_pre_scale_abs_mean": 0.039787986874580385,
|
|
"signal/advantage_pre_scale_std": 0.08424456864595413,
|
|
"signal/advantage_std": 0.08424456864595413,
|
|
"signal/brier_reward/centered_abs_mean": 0.09390641152858734,
|
|
"signal/brier_reward/group_std_mean": 0.12350601404905319,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011738301441073417,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011738301441073417,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02469801902770996,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.031055227667093278,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003087252378463745,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003087252378463745,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014515453251078725,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023173499619588258,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5982661463785915e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5982661463785915e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1336147144436836,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1740594267845154,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002391703147441149,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002391703147441149,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11904115676879883,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15523334443569184,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002130836620926857,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002130836620926857,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07668739408254624,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09952570647001266,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013727043056860565,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013727043056860565,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05882178023457527,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07488873153924942,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010529098566621543,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010529098566621543,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07149278298020363,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09316664934158325,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001279720780439675,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001279720780439675,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1336147144436836,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1740594267845154,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002391703147441149,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002391703147441149,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0030738627538084984,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0040326244197785854,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.03125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003842328442260623,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003842328442260623,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"eval_calibration/aurc": 0.45877331809546285,
|
|
"eval_calibration/batch_distribution_entropy": 0.8487918639736909,
|
|
"eval_calibration/buffer_distribution_entropy": 0.8943828101486132,
|
|
"eval_calibration/confidence_entropy": 0.37301273139280466,
|
|
"eval_calibration/coverage@0%": 0.03125,
|
|
"eval_calibration/coverage@1%": 0.03125,
|
|
"eval_calibration/coverage@10%": 0.03125,
|
|
"eval_calibration/coverage@15%": 0.0625,
|
|
"eval_calibration/coverage@20%": 0.1953125,
|
|
"eval_calibration/coverage@25%": 0.234375,
|
|
"eval_calibration/coverage@30%": 0.3046875,
|
|
"eval_calibration/coverage@5%": 0.03125,
|
|
"eval_calibration/ece": 0.20022656249999998,
|
|
"eval_calibration/mean_confidence": 0.47538281250000003,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 385.5,
|
|
"eval_completions/max_terminated_length": 385.5,
|
|
"eval_completions/mean_length": 177.91608428955078,
|
|
"eval_completions/mean_terminated_length": 177.91608428955078,
|
|
"eval_completions/min_length": 96.5,
|
|
"eval_completions/min_terminated_length": 96.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 1006238586.0,
|
|
"eval_reward": 0.9505706876516342,
|
|
"eval_reward_std": 0.24583137407898903,
|
|
"eval_rewards/accuracy_reward": 0.447265625,
|
|
"eval_rewards/brier_reward": 0.7869907170534134,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.898681640625,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0033371542813256383,
|
|
"eval_rewards/frontier_coverage_1": 0.20748823508620262,
|
|
"eval_rewards/frontier_coverage_10": 0.1859576664865017,
|
|
"eval_rewards/frontier_coverage_15": 0.12141189724206924,
|
|
"eval_rewards/frontier_coverage_20": 0.08090419881045818,
|
|
"eval_rewards/frontier_coverage_25": 0.07952974922955036,
|
|
"eval_rewards/frontier_coverage_5": 0.20748823508620262,
|
|
"eval_rewards/frontier_ece_reward": 0.0038945103879086673,
|
|
"eval_runtime": 20.0315,
|
|
"eval_samples_per_second": 24.961,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4803466796875,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4976552575826645,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.24017333984375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.24017333984375,
|
|
"eval_signal/advantage_abs_mean": 0.23006105422973633,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.23006105422973633,
|
|
"eval_signal/advantage_pre_scale_std": 0.24323223158717155,
|
|
"eval_signal/advantage_std": 0.24323223158717155,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.23864521458745003,
|
|
"eval_signal/brier_reward/group_std_mean": 0.295004665851593,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029830651823431253,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.029830651823431253,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.044342041015625,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05374839436262846,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005542755126953125,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005542755126953125,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004506968369241804,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.009769670432433486,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.06747302704025e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.06747302704025e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.37909433990716934,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.46202613413333893,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00678578857332468,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00678578857332468,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3334348201751709,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.40698229521512985,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005968482932075858,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005968482932075858,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.19724000990390778,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.24512441456317902,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003530596033670008,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003530596033670008,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.12327801994979382,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.1503501832485199,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022066764649935067,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022066764649935067,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2011748030781746,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.2626011222600937,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036010288167744875,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036010288167744875,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.37909433990716934,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.46202613413333893,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00678578857332468,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00678578857332468,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.006420767167583108,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.00843157060444355,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008025958959478885,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008025958959478885,
|
|
"eval_steps_per_second": 0.2,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"step": 300,
|
|
"train_probe_calibration/aurc": 0.11334329991864908,
|
|
"train_probe_calibration/batch_distribution_entropy": 0.823814016063535,
|
|
"train_probe_calibration/buffer_distribution_entropy": 0.8939561434753694,
|
|
"train_probe_calibration/confidence_entropy": 0.3812485580477293,
|
|
"train_probe_calibration/coverage@0%": 0.3515625,
|
|
"train_probe_calibration/coverage@1%": 0.3515625,
|
|
"train_probe_calibration/coverage@10%": 0.6171875,
|
|
"train_probe_calibration/coverage@15%": 0.71875,
|
|
"train_probe_calibration/coverage@20%": 0.7890625,
|
|
"train_probe_calibration/coverage@25%": 0.8671875,
|
|
"train_probe_calibration/coverage@30%": 0.921875,
|
|
"train_probe_calibration/coverage@5%": 0.359375,
|
|
"train_probe_calibration/ece": 0.15804687500000003,
|
|
"train_probe_calibration/mean_confidence": 0.590703125,
|
|
"train_probe_completions/clipped_ratio": 0.001953125,
|
|
"train_probe_completions/max_length": 607.0,
|
|
"train_probe_completions/max_terminated_length": 316.0,
|
|
"train_probe_completions/mean_length": 175.61072158813477,
|
|
"train_probe_completions/mean_terminated_length": 172.95431900024414,
|
|
"train_probe_completions/min_length": 94.75,
|
|
"train_probe_completions/min_terminated_length": 94.75,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 1006238586.0,
|
|
"train_probe_reward": 1.0620156228542328,
|
|
"train_probe_reward_std": 0.22856702283024788,
|
|
"train_probe_rewards/accuracy_reward": 0.66015625,
|
|
"train_probe_rewards/brier_reward": 0.8613701313734055,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.8939720988273621,
|
|
"train_probe_rewards/format_reward": 0.998046875,
|
|
"train_probe_rewards/frontier_aurc_reward": -0.0012163210631115362,
|
|
"train_probe_rewards/frontier_coverage_1": 0.12271312065422535,
|
|
"train_probe_rewards/frontier_coverage_10": 0.11114241741597652,
|
|
"train_probe_rewards/frontier_coverage_15": 0.0873615425080061,
|
|
"train_probe_rewards/frontier_coverage_20": 0.09772194363176823,
|
|
"train_probe_rewards/frontier_coverage_25": 0.18827218934893608,
|
|
"train_probe_rewards/frontier_coverage_5": 0.12271312065422535,
|
|
"train_probe_rewards/frontier_ece_reward": 0.003619219409301877,
|
|
"train_probe_runtime": 25.9257,
|
|
"train_probe_samples_per_second": 19.286,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.440185546875,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.4761292338371277,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2200927734375,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2200927734375,
|
|
"train_probe_signal/advantage_abs_mean": 0.2054794505238533,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.2054794505238533,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.22665054351091385,
|
|
"train_probe_signal/advantage_std": 0.22665054351091385,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.1662147231400013,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.23011131957173347,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020776840392500162,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.020776840392500162,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.043102139607071877,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.054641361348330975,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0053877674508839846,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0053877674508839846,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0037841796875,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.011048543266952038,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 0.9375,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.002128588006598875,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.004736322327516973,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.81017252948368e-05,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.81017252948368e-05,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.33944354206323624,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.44217299669981003,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006076039047911763,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006076039047911763,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.2949482724070549,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.38730061054229736,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005279573961161077,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005279573961161077,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.16588661447167397,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.2280319258570671,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002969370281789452,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002969370281789452,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.10244773887097836,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.12942470982670784,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018338145164307207,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018338145164307207,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.1977926529943943,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.23229693248867989,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003540488425642252,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003540488425642252,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.33944354206323624,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.44217299669981003,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006076039047911763,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006076039047911763,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.005502797896042466,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.0075735143618658185,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006878497370053083,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006878497370053083,
|
|
"train_probe_steps_per_second": 0.154
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18582277411444578,
|
|
"calibration/batch_distribution_entropy": 0.9112480015757871,
|
|
"calibration/buffer_distribution_entropy": 0.8951250339277783,
|
|
"calibration/confidence_entropy": 0.39676409676776897,
|
|
"calibration/coverage@0%": 0.08125,
|
|
"calibration/coverage@1%": 0.1515625,
|
|
"calibration/coverage@10%": 0.422265625,
|
|
"calibration/coverage@15%": 0.4765625,
|
|
"calibration/coverage@20%": 0.548046875,
|
|
"calibration/coverage@25%": 0.61015625,
|
|
"calibration/coverage@30%": 0.67890625,
|
|
"calibration/coverage@5%": 0.338671875,
|
|
"calibration/ece": 0.1494024143555333,
|
|
"calibration/mean_confidence": 0.5677664443004726,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 697.6,
|
|
"completions/max_terminated_length": 533.2,
|
|
"completions/mean_length": 178.14248046875,
|
|
"completions/mean_terminated_length": 178.00925903320314,
|
|
"completions/min_length": 82.6,
|
|
"completions/min_terminated_length": 82.6,
|
|
"epoch": 0.976,
|
|
"grad_norm": 0.0008968439069576561,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 1022923885.0,
|
|
"reward": 1.0480751514434814,
|
|
"reward_std": 0.05989357978105545,
|
|
"rewards/accuracy_reward": 0.62490234375,
|
|
"rewards/brier_reward": 0.8356807827949524,
|
|
"rewards/confidence_uniqueness_reward": 0.9470009803771973,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.001478810131084174,
|
|
"rewards/frontier_coverage_1": 0.12298977077007293,
|
|
"rewards/frontier_coverage_10": 0.1169760562479496,
|
|
"rewards/frontier_coverage_15": 0.08561454713344574,
|
|
"rewards/frontier_coverage_20": 0.09092361256480216,
|
|
"rewards/frontier_coverage_25": 0.16219930350780487,
|
|
"rewards/frontier_coverage_5": 0.12298977077007293,
|
|
"rewards/frontier_ece_reward": 0.0032111145555973053,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.077911376953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.10675206631422043,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0389556884765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0389556884765625,
|
|
"signal/advantage_abs_mean": 0.043699586391448976,
|
|
"signal/advantage_pre_scale_abs_mean": 0.043699586391448976,
|
|
"signal/advantage_pre_scale_std": 0.08926723450422287,
|
|
"signal/advantage_std": 0.08926723450422287,
|
|
"signal/brier_reward/centered_abs_mean": 0.096511709690094,
|
|
"signal/brier_reward/group_std_mean": 0.12847652584314345,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01206396371126175,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01206396371126175,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02388366758823395,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03034769296646118,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029854584485292436,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029854584485292436,
|
|
"signal/format_reward/centered_abs_mean": 0.000555419921875,
|
|
"signal/format_reward/group_std_mean": 0.0013209730386734009,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012161832652054726,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0019433848559856416,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1769678642158397e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1769678642158397e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13997844159603118,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18497555553913117,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002505614003166556,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002505614003166556,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12051929384469987,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15961622595787048,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021572952857241033,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021572952857241033,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07593502700328827,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10026619136333466,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013592369155958294,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013592369155958294,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05694276541471481,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07313971668481827,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010192754562012851,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010192754562012851,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07261455804109573,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09487131386995315,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012998004909604787,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012998004909604787,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13997844159603118,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18497555553913117,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002505614003166556,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002505614003166556,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003159593231976032,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004189403681084514,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.04375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000394949153997004,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000394949153997004,
|
|
"step": 305
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3200527470519939,
|
|
"calibration/batch_distribution_entropy": 0.8959806922728791,
|
|
"calibration/buffer_distribution_entropy": 0.8960747979400654,
|
|
"calibration/confidence_entropy": 0.3658938308584535,
|
|
"calibration/coverage@0%": 0.014453125,
|
|
"calibration/coverage@1%": 0.014453125,
|
|
"calibration/coverage@10%": 0.043359375,
|
|
"calibration/coverage@15%": 0.11328125,
|
|
"calibration/coverage@20%": 0.298828125,
|
|
"calibration/coverage@25%": 0.423828125,
|
|
"calibration/coverage@30%": 0.569921875,
|
|
"calibration/coverage@5%": 0.019140625,
|
|
"calibration/ece": 0.16820631052342688,
|
|
"calibration/mean_confidence": 0.5135351632988238,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 674.2,
|
|
"completions/max_terminated_length": 451.6,
|
|
"completions/mean_length": 174.34384765625,
|
|
"completions/mean_terminated_length": 174.21048278808593,
|
|
"completions/min_length": 79.2,
|
|
"completions/min_terminated_length": 79.2,
|
|
"epoch": 0.992,
|
|
"grad_norm": 0.0009708595462143421,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 1039837646.0,
|
|
"reward": 1.015864658355713,
|
|
"reward_std": 0.0644782729446888,
|
|
"rewards/accuracy_reward": 0.56572265625,
|
|
"rewards/brier_reward": 0.8120604991912842,
|
|
"rewards/confidence_uniqueness_reward": 0.9418984651565552,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002516076737083495,
|
|
"rewards/frontier_coverage_1": 0.14565924555063248,
|
|
"rewards/frontier_coverage_10": 0.1338302969932556,
|
|
"rewards/frontier_coverage_15": 0.09351640939712524,
|
|
"rewards/frontier_coverage_20": 0.0904716819524765,
|
|
"rewards/frontier_coverage_25": 0.14130311608314514,
|
|
"rewards/frontier_coverage_5": 0.14565924555063248,
|
|
"rewards/frontier_ece_reward": 0.003355812141671777,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.088238525390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11467040479183196,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0441192626953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0441192626953125,
|
|
"signal/advantage_abs_mean": 0.04950515627861023,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04950515627861023,
|
|
"signal/advantage_pre_scale_std": 0.09848933815956115,
|
|
"signal/advantage_std": 0.09848933815956115,
|
|
"signal/brier_reward/centered_abs_mean": 0.10926359742879868,
|
|
"signal/brier_reward/group_std_mean": 0.13695080131292342,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013657949678599835,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013657949678599835,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025544070824980735,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.032745585590600965,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003193008853122592,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003193008853122592,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021328864386305213,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003304897760972381,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.817866781901103e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.817866781901103e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14268429577350616,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1820806473493576,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025540488539263608,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025540488539263608,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12527389973402023,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15980836749076843,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022424027556553483,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022424027556553483,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07919367253780366,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10084569156169891,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014175667194649578,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014175667194649578,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.061572205275297165,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0771061822772026,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011021424317732454,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011021424317732454,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07973235845565796,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10166804194450378,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014272091211751103,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014272091211751103,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14268429577350616,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1820806473493576,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025540488539263608,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025540488539263608,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003300653723999858,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004214685643091798,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.04375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00041258171549998226,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00041258171549998226,
|
|
"step": 310
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1585634225548857,
|
|
"calibration/batch_distribution_entropy": 0.8018340917186807,
|
|
"calibration/buffer_distribution_entropy": 0.8974739698135898,
|
|
"calibration/confidence_entropy": 0.32092406024176,
|
|
"calibration/coverage@0%": 0.1025390625,
|
|
"calibration/coverage@1%": 0.119140625,
|
|
"calibration/coverage@10%": 0.3955078125,
|
|
"calibration/coverage@15%": 0.5185546875,
|
|
"calibration/coverage@20%": 0.6552734375,
|
|
"calibration/coverage@25%": 0.78515625,
|
|
"calibration/coverage@30%": 0.83984375,
|
|
"calibration/coverage@5%": 0.2958984375,
|
|
"calibration/ece": 0.16362565962695846,
|
|
"calibration/mean_confidence": 0.6704694096269584,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 405.0,
|
|
"completions/max_terminated_length": 405.0,
|
|
"completions/mean_length": 172.0516586303711,
|
|
"completions/mean_terminated_length": 172.0516586303711,
|
|
"completions/min_length": 88.0,
|
|
"completions/min_terminated_length": 88.0,
|
|
"epoch": 0.9984,
|
|
"num_tokens": 1046548430.0,
|
|
"reward": 1.0416707396507263,
|
|
"reward_std": 0.0726642906665802,
|
|
"rewards/accuracy_reward": 0.6279296875,
|
|
"rewards/brier_reward": 0.7980144023895264,
|
|
"rewards/confidence_uniqueness_reward": 0.9470119476318359,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0022485224180854857,
|
|
"rewards/frontier_coverage_1": 0.08259440585970879,
|
|
"rewards/frontier_coverage_10": 0.07215217500925064,
|
|
"rewards/frontier_coverage_15": 0.059676751494407654,
|
|
"rewards/frontier_coverage_20": 0.07507448270916939,
|
|
"rewards/frontier_coverage_25": 0.14635684341192245,
|
|
"rewards/frontier_coverage_5": 0.08259440585970879,
|
|
"rewards/frontier_ece_reward": 0.002701267832890153,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.085662841796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12225553393363953,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0428314208984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0428314208984375,
|
|
"signal/advantage_abs_mean": 0.0527034904807806,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0527034904807806,
|
|
"signal/advantage_pre_scale_std": 0.10466087237000465,
|
|
"signal/advantage_std": 0.10466087237000465,
|
|
"signal/brier_reward/centered_abs_mean": 0.11700525507330894,
|
|
"signal/brier_reward/group_std_mean": 0.15130788832902908,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014625656884163618,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014625656884163618,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023126959800720215,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02852536365389824,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002890869975090027,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002890869975090027,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00226385158021003,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038065230473876,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.052294389111921e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.052294389111921e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13711658865213394,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1796187162399292,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024543870240449905,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024543870240449905,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1192222610116005,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1569937914609909,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002134078531526029,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002134078531526029,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07506273686885834,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09861153736710548,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013436229201033711,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013436229201033711,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.058869652450084686,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07617875188589096,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010537666967138648,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010537666967138648,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08716562017798424,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11405183747410774,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015602644998580217,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015602644998580217,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13711658865213394,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1796187162399292,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024543870240449905,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024543870240449905,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003408772055990994,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004509588470682502,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0390625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00042609650699887425,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00042609650699887425,
|
|
"step": 312,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.00471675537865406,
|
|
"train_runtime": 59905.1304,
|
|
"train_samples_per_second": 0.334,
|
|
"train_steps_per_second": 0.005
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 312,
|
|
"num_input_tokens_seen": 1046548430,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|