Model: hector-gr/RLCR-v4-ks-uniqueness-cov0-entropy100-highcov-cold-math Source: Original Platform
7368 lines
468 KiB
JSON
7368 lines
468 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.49919376007799904,
|
|
"eval_steps": 50,
|
|
"global_step": 208,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.5038101588421379,
|
|
"calibration/batch_distribution_entropy": 0.27496546226898255,
|
|
"calibration/batch_entropy_100bins": 0.34693576898254996,
|
|
"calibration/batch_entropy_10bins": 0.27496546226898255,
|
|
"calibration/batch_entropy_50bins": 0.40406129441079397,
|
|
"calibration/batch_uniqueness": 0.4937993944856712,
|
|
"calibration/confidence_entropy": 0.21554487482269122,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4653161376360508,
|
|
"calibration/mean_confidence": 0.9164561588356814,
|
|
"calibration/prompt_uniqueness": 0.35475345061773306,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01909722222222221,
|
|
"completions/max_length": 4034.2,
|
|
"completions/max_terminated_length": 4034.2,
|
|
"completions/mean_length": 514.2317749023438,
|
|
"completions/mean_terminated_length": 524.245458984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.8,
|
|
"epoch": 0.011999850001874977,
|
|
"grad_norm": 0.005631071049720049,
|
|
"learning_rate": 5.952380952380953e-07,
|
|
"loss": 0.0022,
|
|
"num_tokens": 9038158.0,
|
|
"reward": 0.6567242383956909,
|
|
"reward_std": 0.6597963333129883,
|
|
"rewards/accuracy_reward": 0.26137152314186096,
|
|
"rewards/brier_reward": 0.3122586965560913,
|
|
"rewards/confidence_uniqueness_reward": 0.2905979037284851,
|
|
"rewards/format_reward": 0.5998263835906983,
|
|
"rewards/frontier_aurc_reward": 0.27479134798049926,
|
|
"rewards/frontier_coverage_0": 0.27479134798049926,
|
|
"rewards/frontier_coverage_1": 0.27479134798049926,
|
|
"rewards/frontier_coverage_10": 0.27479134798049926,
|
|
"rewards/frontier_coverage_15": 0.27479134798049926,
|
|
"rewards/frontier_coverage_20": 0.27479134798049926,
|
|
"rewards/frontier_coverage_25": 0.27479134798049926,
|
|
"rewards/frontier_coverage_5": 0.27479134798049926,
|
|
"rewards/frontier_ece_reward": 0.27479134798049926,
|
|
"rewards/frontier_entropy_batch_reward": -0.574283504486084,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.3058105528354645,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.23923611111111112,
|
|
"signal/accuracy_reward/group_std_mean": 0.366261488199234,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.0861111119389534,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15290527641773224,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15290527641773224,
|
|
"signal/advantage_abs_mean": 0.5603552341461182,
|
|
"signal/advantage_pre_scale_abs_mean": 0.5603552341461182,
|
|
"signal/advantage_pre_scale_std": 0.6780304908752441,
|
|
"signal/advantage_std": 0.6780304908752441,
|
|
"signal/brier_reward/centered_abs_mean": 0.31586124300956725,
|
|
"signal/brier_reward/group_bin_occupancy": 0.5177083333333333,
|
|
"signal/brier_reward/group_std_mean": 0.36988683938980105,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031586124747991565,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.031586124747991565,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.23471923768520356,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.5965277777777778,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2862655222415924,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023471924290060998,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023471924290060998,
|
|
"signal/format_reward/centered_abs_mean": 0.438910585641861,
|
|
"signal/format_reward/group_bin_occupancy": 0.25,
|
|
"signal/format_reward/group_std_mean": 0.4741047382354736,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2194552928209305,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.2194552928209305,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.3059499800205231,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.39826388888888886,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3646116256713867,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038243749178946016,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038243749178946016,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.3059499800205231,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.39826388888888886,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3646116256713867,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.3059499800205231,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.39826388888888886,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3646116256713867,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3059499800205231,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.39826388888888886,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3646116256713867,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3059499800205231,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.39826388888888886,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3646116256713867,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3059499800205231,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.39826388888888886,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3646116256713867,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3059499800205231,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.39826388888888886,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3646116256713867,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.3059499800205231,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.39826388888888886,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3646116256713867,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.3059499800205231,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.39826388888888886,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3646116256713867,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030594999343156813,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4482546389102936,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2986111111111111,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4814043164253235,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.044825464487075806,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.044825464487075806,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.544070730834551,
|
|
"calibration/batch_distribution_entropy": 0.2873650497976389,
|
|
"calibration/batch_entropy_100bins": 0.3592202535931412,
|
|
"calibration/batch_entropy_10bins": 0.2873650497976389,
|
|
"calibration/batch_entropy_50bins": 0.41561871184308197,
|
|
"calibration/batch_uniqueness": 0.5038269582994224,
|
|
"calibration/confidence_entropy": 0.21970684330493354,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.48642087721955046,
|
|
"calibration/mean_confidence": 0.9148550403550793,
|
|
"calibration/prompt_uniqueness": 0.396802208624482,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017447916666666653,
|
|
"completions/max_length": 4013.0,
|
|
"completions/max_terminated_length": 4013.0,
|
|
"completions/mean_length": 480.61961059570314,
|
|
"completions/mean_terminated_length": 489.2898864746094,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 9.4,
|
|
"epoch": 0.023999700003749954,
|
|
"grad_norm": 0.005415608175098896,
|
|
"learning_rate": 1.1904761904761906e-06,
|
|
"loss": -0.0015,
|
|
"num_tokens": 17657616.0,
|
|
"reward": 0.7464439153671265,
|
|
"reward_std": 0.6387369275093079,
|
|
"rewards/accuracy_reward": 0.2859374940395355,
|
|
"rewards/brier_reward": 0.34971494078636167,
|
|
"rewards/confidence_uniqueness_reward": 0.3477144420146942,
|
|
"rewards/format_reward": 0.7115451335906983,
|
|
"rewards/frontier_aurc_reward": 0.302881646156311,
|
|
"rewards/frontier_coverage_0": 0.302881646156311,
|
|
"rewards/frontier_coverage_1": 0.302881646156311,
|
|
"rewards/frontier_coverage_10": 0.302881646156311,
|
|
"rewards/frontier_coverage_15": 0.302881646156311,
|
|
"rewards/frontier_coverage_20": 0.302881646156311,
|
|
"rewards/frontier_coverage_25": 0.302881646156311,
|
|
"rewards/frontier_coverage_5": 0.302881646156311,
|
|
"rewards/frontier_ece_reward": 0.302881646156311,
|
|
"rewards/frontier_entropy_batch_reward": -0.6813170671463012,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.31569010615348814,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.24131944444444448,
|
|
"signal/accuracy_reward/group_std_mean": 0.37748109102249144,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.06944444514811039,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15784505307674407,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15784505307674407,
|
|
"signal/advantage_abs_mean": 0.533976936340332,
|
|
"signal/advantage_pre_scale_abs_mean": 0.533976936340332,
|
|
"signal/advantage_pre_scale_std": 0.6536542654037476,
|
|
"signal/advantage_std": 0.6536542654037476,
|
|
"signal/brier_reward/centered_abs_mean": 0.3154709577560425,
|
|
"signal/brier_reward/group_bin_occupancy": 0.5368055555555555,
|
|
"signal/brier_reward/group_std_mean": 0.36963658332824706,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03154709674417973,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03154709674417973,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.22943984270095824,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6229166666666667,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.28361154794692994,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02294398322701454,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02294398322701454,
|
|
"signal/format_reward/centered_abs_mean": 0.3570583701133728,
|
|
"signal/format_reward/group_bin_occupancy": 0.24965277777777778,
|
|
"signal/format_reward/group_std_mean": 0.42141222953796387,
|
|
"signal/format_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1785291850566864,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1785291850566864,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.3122061789035797,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.41736111111111107,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.37071062326431276,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003902577608823776,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003902577608823776,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.3122061789035797,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.41736111111111107,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.37071062326431276,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.3122061789035797,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.41736111111111107,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.37071062326431276,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3122061789035797,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.41736111111111107,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.37071062326431276,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3122061789035797,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.41736111111111107,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.37071062326431276,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3122061789035797,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.41736111111111107,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.37071062326431276,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3122061789035797,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.41736111111111107,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.37071062326431276,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.3122061789035797,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.41736111111111107,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.37071062326431276,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.3122061789035797,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.41736111111111107,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.37071062326431276,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03122062087059021,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38358516097068784,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3041666666666667,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4429487228393555,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0383585162460804,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0383585162460804,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5192184232870535,
|
|
"calibration/batch_distribution_entropy": 0.2858053834632349,
|
|
"calibration/batch_entropy_100bins": 0.359574296826191,
|
|
"calibration/batch_entropy_10bins": 0.2858053834632349,
|
|
"calibration/batch_entropy_50bins": 0.4153101505392646,
|
|
"calibration/batch_uniqueness": 0.5221154909357117,
|
|
"calibration/confidence_entropy": 0.22962418332165271,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.49317452981313137,
|
|
"calibration/mean_confidence": 0.9151010337113051,
|
|
"calibration/prompt_uniqueness": 0.42620232373241934,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01519097222222221,
|
|
"completions/max_length": 3977.2,
|
|
"completions/max_terminated_length": 3977.2,
|
|
"completions/mean_length": 456.0194458007812,
|
|
"completions/mean_terminated_length": 463.06640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 40.2,
|
|
"epoch": 0.03599955000562493,
|
|
"grad_norm": 0.0063781049102544785,
|
|
"learning_rate": 1.7857142857142859e-06,
|
|
"loss": -0.0139,
|
|
"num_tokens": 26012944.0,
|
|
"reward": 0.9145050406455993,
|
|
"reward_std": 0.5620147705078125,
|
|
"rewards/accuracy_reward": 0.3283854126930237,
|
|
"rewards/brier_reward": 0.4270216226577759,
|
|
"rewards/confidence_uniqueness_reward": 0.49301198720932005,
|
|
"rewards/format_reward": 0.9131076216697693,
|
|
"rewards/frontier_aurc_reward": 0.35533509850502015,
|
|
"rewards/frontier_coverage_0": 0.35533509850502015,
|
|
"rewards/frontier_coverage_1": 0.35533509850502015,
|
|
"rewards/frontier_coverage_10": 0.35533509850502015,
|
|
"rewards/frontier_coverage_15": 0.35533509850502015,
|
|
"rewards/frontier_coverage_20": 0.35533509850502015,
|
|
"rewards/frontier_coverage_25": 0.35533509850502015,
|
|
"rewards/frontier_coverage_5": 0.35533509850502015,
|
|
"rewards/frontier_ece_reward": 0.35533509850502015,
|
|
"rewards/frontier_entropy_batch_reward": -0.8695465922355652,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.312841796875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.23958333333333334,
|
|
"signal/accuracy_reward/group_std_mean": 0.3740617513656616,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.0833333358168602,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1564208984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1564208984375,
|
|
"signal/advantage_abs_mean": 0.46465239524841306,
|
|
"signal/advantage_pre_scale_abs_mean": 0.46465239524841306,
|
|
"signal/advantage_pre_scale_std": 0.5796213746070862,
|
|
"signal/advantage_std": 0.5796213746070862,
|
|
"signal/brier_reward/centered_abs_mean": 0.2964577376842499,
|
|
"signal/brier_reward/group_bin_occupancy": 0.590625,
|
|
"signal/brier_reward/group_std_mean": 0.35084177255630494,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029645774513483047,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.029645774513483047,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.18456263542175294,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6329861111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.23556708097457885,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018456263840198515,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018456263840198515,
|
|
"signal/format_reward/centered_abs_mean": 0.14099934846162795,
|
|
"signal/format_reward/group_bin_occupancy": 0.22256944444444446,
|
|
"signal/format_reward/group_std_mean": 0.22586170136928557,
|
|
"signal/format_reward/group_zero_std_frac": 0.21944445110857486,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.07049967423081398,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.07049967423081398,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.30630324482917787,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.4604166666666666,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3642661988735199,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038287907373160124,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038287907373160124,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.30630324482917787,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.4604166666666666,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3642661988735199,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.30630324482917787,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.4604166666666666,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3642661988735199,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.30630324482917787,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.4604166666666666,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3642661988735199,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.30630324482917787,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.4604166666666666,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3642661988735199,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.30630324482917787,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.4604166666666666,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3642661988735199,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.30630324482917787,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.4604166666666666,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3642661988735199,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.30630324482917787,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.4604166666666666,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3642661988735199,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.30630324482917787,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.4604166666666666,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3642661988735199,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0306303258985281,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2086976408958435,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3149305555555556,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.31270697712898254,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06111111324280501,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02086976356804371,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02086976356804371,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.443977124474033,
|
|
"calibration/batch_distribution_entropy": 0.43868894998445,
|
|
"calibration/batch_entropy_100bins": 0.40972118599624574,
|
|
"calibration/batch_entropy_10bins": 0.43868894998445,
|
|
"calibration/batch_entropy_50bins": 0.4759905332800923,
|
|
"calibration/batch_uniqueness": 0.6093611651716276,
|
|
"calibration/buffer_distribution_entropy": 0.32067142272542265,
|
|
"calibration/buffer_entropy_100bins": 0.38150793304591346,
|
|
"calibration/buffer_entropy_10bins": 0.32067142272542265,
|
|
"calibration/buffer_entropy_50bins": 0.440799581674695,
|
|
"calibration/confidence_entropy": 0.32815220193055084,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.024802110817941952,
|
|
"calibration/coverage@30%": 0.05460019151677149,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3718585004618767,
|
|
"calibration/mean_confidence": 0.8746366326346813,
|
|
"calibration/prompt_uniqueness": 0.5123440414977478,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009461805555555536,
|
|
"completions/max_length": 3978.4,
|
|
"completions/max_terminated_length": 3978.4,
|
|
"completions/mean_length": 488.863720703125,
|
|
"completions/mean_terminated_length": 493.5744262695313,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 101.2,
|
|
"epoch": 0.04799940000749991,
|
|
"grad_norm": 0.0008680089376866817,
|
|
"learning_rate": 2.380952380952381e-06,
|
|
"loss": -0.006,
|
|
"num_tokens": 34758350.0,
|
|
"reward": 0.9097142934799194,
|
|
"reward_std": 0.34938407242298125,
|
|
"rewards/accuracy_reward": 0.44947916865348814,
|
|
"rewards/brier_reward": 0.5689934015274047,
|
|
"rewards/confidence_uniqueness_reward": 0.6051031112670898,
|
|
"rewards/format_reward": 0.9827256917953491,
|
|
"rewards/frontier_aurc_reward": 0.20144999362528324,
|
|
"rewards/frontier_coverage_0": 0.2116093705408275,
|
|
"rewards/frontier_coverage_1": 0.2116093705408275,
|
|
"rewards/frontier_coverage_10": 0.2116093705408275,
|
|
"rewards/frontier_coverage_15": 0.2116093705408275,
|
|
"rewards/frontier_coverage_20": 0.2116093705408275,
|
|
"rewards/frontier_coverage_25": 0.2116093705408275,
|
|
"rewards/frontier_coverage_5": 0.2116093705408275,
|
|
"rewards/frontier_ece_reward": 0.19258032105863093,
|
|
"rewards/frontier_entropy_batch_reward": -0.9370049834251404,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2896375894546509,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.23958333333333334,
|
|
"signal/accuracy_reward/group_std_mean": 0.3589329898357391,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.0833333358168602,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14481879472732545,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14481879472732545,
|
|
"signal/advantage_abs_mean": 0.2791608601808548,
|
|
"signal/advantage_pre_scale_abs_mean": 0.2791608601808548,
|
|
"signal/advantage_pre_scale_std": 0.36436753273010253,
|
|
"signal/advantage_std": 0.36436753273010253,
|
|
"signal/brier_reward/centered_abs_mean": 0.2495465785264969,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6586805555555556,
|
|
"signal/brier_reward/group_std_mean": 0.3062271773815155,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024954657629132272,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.024954657629132272,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.17777037620544434,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6329861111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.21108138859272002,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017777037993073463,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.017777037993073463,
|
|
"signal/format_reward/centered_abs_mean": 0.03138563297688961,
|
|
"signal/format_reward/group_bin_occupancy": 0.16562499999999997,
|
|
"signal/format_reward/group_std_mean": 0.06965429857373237,
|
|
"signal/format_reward/group_zero_std_frac": 0.675,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015692816488444804,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.015692816488444804,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.1230311962775886,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6243055555555556,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.15198022853583099,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0015378899362985976,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0015378899362985976,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14054877683520317,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.6204861111111112,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18199999555945395,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.014054877683520317,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.014054877683520317,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14054877683520317,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.6204861111111112,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18199999555945395,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.014054877683520317,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.014054877683520317,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14054877683520317,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.6204861111111112,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18199999555945395,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014054877683520317,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.014054877683520317,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14054877683520317,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.6204861111111112,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18199999555945395,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014054877683520317,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.014054877683520317,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.14054877683520317,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.6204861111111112,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.18199999555945395,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.014054877683520317,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.014054877683520317,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14054877683520317,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.6204861111111112,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.18199999555945395,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.014054877683520317,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.014054877683520317,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14054877683520317,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.6204861111111112,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18199999555945395,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.014054877683520317,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.014054877683520317,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.2141006052494049,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.5802083333333334,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.264327472448349,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.021410060301423072,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.021410060301423072,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10964042991399765,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2777777777777778,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.20423128306865693,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.25555555820465087,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01096404269337654,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01096404269337654,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.35032314868572345,
|
|
"calibration/batch_distribution_entropy": 0.614438890013979,
|
|
"calibration/batch_entropy_100bins": 0.4634310297287791,
|
|
"calibration/batch_entropy_10bins": 0.614438890013979,
|
|
"calibration/batch_entropy_50bins": 0.5428867358379499,
|
|
"calibration/batch_uniqueness": 0.7195552397935103,
|
|
"calibration/buffer_distribution_entropy": 0.38324780384371143,
|
|
"calibration/buffer_entropy_100bins": 0.4074862124458683,
|
|
"calibration/buffer_entropy_10bins": 0.38324780384371143,
|
|
"calibration/buffer_entropy_50bins": 0.4720090353275187,
|
|
"calibration/confidence_entropy": 0.41065394887495243,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.005221932114882507,
|
|
"calibration/coverage@15%": 0.0720626631853786,
|
|
"calibration/coverage@20%": 0.13929356175566165,
|
|
"calibration/coverage@25%": 0.3522492768234537,
|
|
"calibration/coverage@30%": 0.4206896551724138,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.23885528444470966,
|
|
"calibration/mean_confidence": 0.8212928714555188,
|
|
"calibration/prompt_uniqueness": 0.6167993494757859,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009809027777777767,
|
|
"completions/max_length": 3817.0,
|
|
"completions/max_terminated_length": 3817.0,
|
|
"completions/mean_length": 543.0953979492188,
|
|
"completions/mean_terminated_length": 548.4666748046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 107.6,
|
|
"epoch": 0.05999925000937488,
|
|
"grad_norm": 0.0006207867991179228,
|
|
"learning_rate": 2.9761904761904763e-06,
|
|
"loss": -0.0071,
|
|
"num_tokens": 44139257.0,
|
|
"reward": 0.8193583488464355,
|
|
"reward_std": 0.20146073400974274,
|
|
"rewards/accuracy_reward": 0.5576388835906982,
|
|
"rewards/brier_reward": 0.6787164211273193,
|
|
"rewards/confidence_uniqueness_reward": 0.7089365482330322,
|
|
"rewards/format_reward": 0.9884548544883728,
|
|
"rewards/frontier_aurc_reward": -0.0040108742192387584,
|
|
"rewards/frontier_coverage_0": 0.0012683632783591747,
|
|
"rewards/frontier_coverage_1": 0.0012683632783591747,
|
|
"rewards/frontier_coverage_10": 0.0012683632783591747,
|
|
"rewards/frontier_coverage_15": 0.0012683632783591747,
|
|
"rewards/frontier_coverage_20": 0.0012683632783591747,
|
|
"rewards/frontier_coverage_25": 0.0012683632783591747,
|
|
"rewards/frontier_coverage_5": 0.0012683632783591747,
|
|
"rewards/frontier_ece_reward": 0.018515123042743654,
|
|
"rewards/frontier_entropy_batch_reward": -0.9514306664466858,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2662000864744186,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.23472222222222222,
|
|
"signal/accuracy_reward/group_std_mean": 0.3329444944858551,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.1222222238779068,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1331000432372093,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1331000432372093,
|
|
"signal/advantage_abs_mean": 0.15778279304504395,
|
|
"signal/advantage_pre_scale_abs_mean": 0.15778279304504395,
|
|
"signal/advantage_pre_scale_std": 0.21362167000770568,
|
|
"signal/advantage_std": 0.21362167000770568,
|
|
"signal/brier_reward/centered_abs_mean": 0.19935325980186464,
|
|
"signal/brier_reward/group_bin_occupancy": 0.7305555555555555,
|
|
"signal/brier_reward/group_std_mean": 0.2499818593263626,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019935326650738716,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019935326650738716,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09970465749502182,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6690972222222223,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.12825550884008408,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009970465674996376,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009970465674996376,
|
|
"signal/format_reward/centered_abs_mean": 0.02030707523226738,
|
|
"signal/format_reward/group_bin_occupancy": 0.14548611111111112,
|
|
"signal/format_reward/group_std_mean": 0.039225579053163526,
|
|
"signal/format_reward/group_zero_std_frac": 0.8361111044883728,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01015353761613369,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01015353761613369,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002486881613731384,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7027777777777778,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0036755402106791735,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.108602177235298e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.108602177235298e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.04731389135122299,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.7815972222222223,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.07089887708425521,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004731389414519072,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004731389414519072,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.04731389135122299,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.7815972222222223,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.07089887708425521,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004731389414519072,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004731389414519072,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.04731389135122299,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.7815972222222223,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.07089887708425521,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004731389414519072,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004731389414519072,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.04731389135122299,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.7815972222222223,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.07089887708425521,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004731389414519072,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004731389414519072,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.04731389135122299,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.7815972222222223,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07089887708425521,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004731389414519072,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004731389414519072,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04731389135122299,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.7815972222222223,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07089887708425521,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004731389414519072,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004731389414519072,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.04731389135122299,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.7815972222222223,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.07089887708425521,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004731389414519072,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004731389414519072,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.1279986619949341,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6677083333333333,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.16064732670783996,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012799866311252118,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012799866311252118,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08364634066820145,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.24097222222222223,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.16521921157836914,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3694444537162781,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.008364634215831756,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.008364634215831756,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28677085956170895,
|
|
"calibration/batch_distribution_entropy": 0.680235851538608,
|
|
"calibration/batch_entropy_100bins": 0.4680978920662368,
|
|
"calibration/batch_entropy_10bins": 0.680235851538608,
|
|
"calibration/batch_entropy_50bins": 0.5495048823241069,
|
|
"calibration/batch_uniqueness": 0.7290616274911208,
|
|
"calibration/buffer_distribution_entropy": 0.47353364621032734,
|
|
"calibration/buffer_entropy_100bins": 0.443610624399519,
|
|
"calibration/buffer_entropy_10bins": 0.47353364621032734,
|
|
"calibration/buffer_entropy_50bins": 0.5154921368659136,
|
|
"calibration/confidence_entropy": 0.48697551131200045,
|
|
"calibration/coverage@0%": 0.004244966999994339,
|
|
"calibration/coverage@1%": 0.004244966999994339,
|
|
"calibration/coverage@10%": 0.004244966999994339,
|
|
"calibration/coverage@15%": 0.027774378764700226,
|
|
"calibration/coverage@20%": 0.0572825754860117,
|
|
"calibration/coverage@25%": 0.2620508574117625,
|
|
"calibration/coverage@30%": 0.591682610683416,
|
|
"calibration/coverage@5%": 0.004244966999994339,
|
|
"calibration/ece": 0.13998204933279731,
|
|
"calibration/mean_confidence": 0.7710976143451138,
|
|
"calibration/prompt_uniqueness": 0.6168338382498494,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01519097222222221,
|
|
"completions/max_length": 3910.8,
|
|
"completions/max_terminated_length": 3910.8,
|
|
"completions/mean_length": 618.71025390625,
|
|
"completions/mean_terminated_length": 628.3085815429688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 151.0,
|
|
"epoch": 0.07199910001124986,
|
|
"grad_norm": 0.0011630720691755414,
|
|
"learning_rate": 3.5714285714285718e-06,
|
|
"loss": -0.0095,
|
|
"num_tokens": 54376719.0,
|
|
"reward": 0.8367651104927063,
|
|
"reward_std": 0.17993904650211334,
|
|
"rewards/accuracy_reward": 0.5965277910232544,
|
|
"rewards/brier_reward": 0.7198675394058227,
|
|
"rewards/confidence_uniqueness_reward": 0.7175234079360961,
|
|
"rewards/format_reward": 0.98359375,
|
|
"rewards/frontier_aurc_reward": -0.003244720213115215,
|
|
"rewards/frontier_coverage_0": -0.006340815802104771,
|
|
"rewards/frontier_coverage_1": -0.006340815802104771,
|
|
"rewards/frontier_coverage_10": -0.006340815802104771,
|
|
"rewards/frontier_coverage_15": -0.006340815802104771,
|
|
"rewards/frontier_coverage_20": -0.006340815802104771,
|
|
"rewards/frontier_coverage_25": -0.006340815802104771,
|
|
"rewards/frontier_coverage_5": -0.006340815802104771,
|
|
"rewards/frontier_ece_reward": 0.02065478153526783,
|
|
"rewards/frontier_entropy_batch_reward": -0.9462109446525574,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2334526836872101,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.22569444444444448,
|
|
"signal/accuracy_reward/group_std_mean": 0.29840933680534365,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.19444444626569748,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11672634184360504,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11672634184360504,
|
|
"signal/advantage_abs_mean": 0.13784168660640717,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13784168660640717,
|
|
"signal/advantage_pre_scale_std": 0.1956336259841919,
|
|
"signal/advantage_std": 0.1956336259841919,
|
|
"signal/brier_reward/centered_abs_mean": 0.1639205902814865,
|
|
"signal/brier_reward/group_bin_occupancy": 0.7708333333333334,
|
|
"signal/brier_reward/group_std_mean": 0.20927065908908843,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01639205850660801,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01639205850660801,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.10898690223693848,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6690972222222222,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.14019887149333954,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010898690670728683,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010898690670728683,
|
|
"signal/format_reward/centered_abs_mean": 0.02789171002805233,
|
|
"signal/format_reward/group_bin_occupancy": 0.15243055555555554,
|
|
"signal/format_reward/group_std_mean": 0.05320079177618027,
|
|
"signal/format_reward/group_zero_std_frac": 0.7805555701255799,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013945855014026166,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013945855014026166,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017630874179303646,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7194444444444444,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0026808131486177446,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2038593306206168e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2038593306206168e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.05981260240077972,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.803125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.08423706740140915,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005981260538101196,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005981260538101196,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.05981260240077972,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.803125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.08423706740140915,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005981260538101196,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005981260538101196,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.05981260240077972,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.803125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.08423706740140915,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005981260538101196,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005981260538101196,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.05981260240077972,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.803125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08423706740140915,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005981260538101196,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005981260538101196,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05981260240077972,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.803125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08423706740140915,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005981260538101196,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005981260538101196,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05981260240077972,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.803125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08423706740140915,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005981260538101196,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005981260538101196,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.05981260240077972,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.803125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.08423706740140915,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005981260538101196,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005981260538101196,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.08587390631437301,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6788194444444444,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.11601630598306656,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008587390463799239,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008587390463799239,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09288413524627685,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.23993055555555554,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.18082668483257294,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3861111104488373,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.00928841382265091,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.00928841382265091,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22314636175619795,
|
|
"calibration/batch_distribution_entropy": 0.6770926741352014,
|
|
"calibration/batch_entropy_100bins": 0.4808851389304557,
|
|
"calibration/batch_entropy_10bins": 0.6770926741352014,
|
|
"calibration/batch_entropy_50bins": 0.560624595713065,
|
|
"calibration/batch_uniqueness": 0.7191333949569602,
|
|
"calibration/buffer_distribution_entropy": 0.5387056776964307,
|
|
"calibration/buffer_entropy_100bins": 0.46868923324209544,
|
|
"calibration/buffer_entropy_10bins": 0.5387056776964307,
|
|
"calibration/buffer_entropy_50bins": 0.5452981940231859,
|
|
"calibration/confidence_entropy": 0.46155691057526704,
|
|
"calibration/coverage@0%": 0.006835227390771403,
|
|
"calibration/coverage@1%": 0.006835227390771403,
|
|
"calibration/coverage@10%": 0.12299082440394646,
|
|
"calibration/coverage@15%": 0.19852194905110143,
|
|
"calibration/coverage@20%": 0.43376444645744583,
|
|
"calibration/coverage@25%": 0.5982394781787371,
|
|
"calibration/coverage@30%": 0.8,
|
|
"calibration/coverage@5%": 0.006835227390771403,
|
|
"calibration/ece": 0.10934520056155941,
|
|
"calibration/mean_confidence": 0.7851432360640994,
|
|
"calibration/prompt_uniqueness": 0.5947078897595999,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.022222222222222233,
|
|
"completions/max_length": 3787.8,
|
|
"completions/max_terminated_length": 3787.8,
|
|
"completions/mean_length": 656.1105102539062,
|
|
"completions/mean_terminated_length": 671.1053100585938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 204.6,
|
|
"epoch": 0.08399895001312484,
|
|
"grad_norm": 0.0006542339688166976,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": -0.0139,
|
|
"num_tokens": 65012552.0,
|
|
"reward": 0.859082019329071,
|
|
"reward_std": 0.1734073728322983,
|
|
"rewards/accuracy_reward": 0.6378472089767456,
|
|
"rewards/brier_reward": 0.7479536890983581,
|
|
"rewards/confidence_uniqueness_reward": 0.7046342015266418,
|
|
"rewards/format_reward": 0.9759548664093017,
|
|
"rewards/frontier_aurc_reward": -0.0026470940094441174,
|
|
"rewards/frontier_coverage_0": -0.0049528153613209724,
|
|
"rewards/frontier_coverage_1": -0.0049528153613209724,
|
|
"rewards/frontier_coverage_10": -0.0049528153613209724,
|
|
"rewards/frontier_coverage_15": -0.0049528153613209724,
|
|
"rewards/frontier_coverage_20": -0.0049528153613209724,
|
|
"rewards/frontier_coverage_25": -0.0049528153613209724,
|
|
"rewards/frontier_coverage_5": -0.0049528153613209724,
|
|
"rewards/frontier_ece_reward": 0.028249557688832284,
|
|
"rewards/frontier_entropy_batch_reward": -0.9240273833274841,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.20604383647441865,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21597222222222223,
|
|
"signal/accuracy_reward/group_std_mean": 0.26489012539386747,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.27222221791744233,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10302191823720933,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10302191823720933,
|
|
"signal/advantage_abs_mean": 0.13017865568399428,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13017865568399428,
|
|
"signal/advantage_pre_scale_std": 0.19113859236240388,
|
|
"signal/advantage_std": 0.19113859236240388,
|
|
"signal/brier_reward/centered_abs_mean": 0.15287761092185975,
|
|
"signal/brier_reward/group_bin_occupancy": 0.7430555555555556,
|
|
"signal/brier_reward/group_std_mean": 0.19788565933704377,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015287761203944683,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015287761203944683,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.13704033493995665,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6673611111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.16784389913082123,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013704033941030503,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013704033941030503,
|
|
"signal/format_reward/centered_abs_mean": 0.035107421875,
|
|
"signal/format_reward/group_bin_occupancy": 0.15416666666666665,
|
|
"signal/format_reward/group_std_mean": 0.060845568776130676,
|
|
"signal/format_reward/group_zero_std_frac": 0.7666666865348816,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0175537109375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0175537109375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018307951977476478,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7076388888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002829930419102311,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.28849399718456e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.28849399718456e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.060160938650369644,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.7854166666666668,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.0857668623328209,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.006016094330698251,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.006016094330698251,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.060160938650369644,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.7854166666666668,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0857668623328209,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006016094330698251,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006016094330698251,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.060160938650369644,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.7854166666666668,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0857668623328209,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006016094330698251,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006016094330698251,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.060160938650369644,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.7854166666666668,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0857668623328209,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006016094330698251,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006016094330698251,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.060160938650369644,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.7854166666666668,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0857668623328209,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006016094330698251,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006016094330698251,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.060160938650369644,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.7854166666666668,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0857668623328209,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006016094330698251,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006016094330698251,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.060160938650369644,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.7854166666666668,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0857668623328209,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006016094330698251,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006016094330698251,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.07295427918434143,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6996527777777778,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.1002663567662239,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007295427843928337,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007295427843928337,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.12212611138820648,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.26041666666666663,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.22245014905929567,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.28055555522441866,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012212611176073552,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.012212611176073552,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23646651151545467,
|
|
"calibration/batch_distribution_entropy": 0.6921987832818134,
|
|
"calibration/batch_entropy_100bins": 0.5484838042629623,
|
|
"calibration/batch_entropy_10bins": 0.6921987832818134,
|
|
"calibration/batch_entropy_50bins": 0.6198041888894421,
|
|
"calibration/batch_uniqueness": 0.7559171649993487,
|
|
"calibration/buffer_distribution_entropy": 0.5720327716762361,
|
|
"calibration/buffer_entropy_100bins": 0.4857154481548491,
|
|
"calibration/buffer_entropy_10bins": 0.5720327716762361,
|
|
"calibration/buffer_entropy_50bins": 0.5634428126437894,
|
|
"calibration/confidence_entropy": 0.4215632545793825,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.08446315789473684,
|
|
"calibration/coverage@15%": 0.14356491228070176,
|
|
"calibration/coverage@20%": 0.4184106637350557,
|
|
"calibration/coverage@25%": 0.5767427952208551,
|
|
"calibration/coverage@30%": 0.7565249757858411,
|
|
"calibration/coverage@5%": 0.021333333333333336,
|
|
"calibration/ece": 0.13757307498135848,
|
|
"calibration/mean_confidence": 0.7957711268796196,
|
|
"calibration/prompt_uniqueness": 0.6417560778644721,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017447916666666653,
|
|
"completions/max_length": 3826.8,
|
|
"completions/max_terminated_length": 3826.8,
|
|
"completions/mean_length": 688.469970703125,
|
|
"completions/mean_terminated_length": 700.6824340820312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 203.4,
|
|
"epoch": 0.09599880001499982,
|
|
"grad_norm": 0.0027070348151028156,
|
|
"learning_rate": 4.761904761904762e-06,
|
|
"loss": -0.0145,
|
|
"num_tokens": 76063246.0,
|
|
"reward": 0.8858483791351318,
|
|
"reward_std": 0.18059354424476623,
|
|
"rewards/accuracy_reward": 0.643749988079071,
|
|
"rewards/brier_reward": 0.7548895835876465,
|
|
"rewards/confidence_uniqueness_reward": 0.7504566788673401,
|
|
"rewards/format_reward": 0.9811631917953492,
|
|
"rewards/frontier_aurc_reward": -0.0025548926088958977,
|
|
"rewards/frontier_coverage_0": 0.005814270488917828,
|
|
"rewards/frontier_coverage_1": 0.005814270488917828,
|
|
"rewards/frontier_coverage_10": 0.005814270488917828,
|
|
"rewards/frontier_coverage_15": 0.005814270488917828,
|
|
"rewards/frontier_coverage_20": 0.005814270488917828,
|
|
"rewards/frontier_coverage_25": 0.005814270488917828,
|
|
"rewards/frontier_coverage_5": 0.005814270488917828,
|
|
"rewards/frontier_ece_reward": 0.03743142113089561,
|
|
"rewards/frontier_entropy_batch_reward": -0.8492406964302063,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19522569477558135,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21597222222222223,
|
|
"signal/accuracy_reward/group_std_mean": 0.258097830414772,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.27222221791744233,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09761284738779068,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09761284738779068,
|
|
"signal/advantage_abs_mean": 0.13384985327720642,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13384985327720642,
|
|
"signal/advantage_pre_scale_std": 0.20037249326705933,
|
|
"signal/advantage_std": 0.20037249326705933,
|
|
"signal/brier_reward/centered_abs_mean": 0.15923767983913423,
|
|
"signal/brier_reward/group_bin_occupancy": 0.7100694444444444,
|
|
"signal/brier_reward/group_std_mean": 0.20939326882362366,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01592376921325922,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01592376921325922,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.14255098551511763,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6333333333333333,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.170270636677742,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01425509825348854,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01425509825348854,
|
|
"signal/format_reward/centered_abs_mean": 0.03126627653837204,
|
|
"signal/format_reward/group_bin_occupancy": 0.15243055555555557,
|
|
"signal/format_reward/group_std_mean": 0.05585132986307144,
|
|
"signal/format_reward/group_zero_std_frac": 0.7805555701255799,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01563313826918602,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01563313826918602,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027065142057836056,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6763888888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004190942086279392,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3831426480901425e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3831426480901425e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.06825486421585084,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.734375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.10302471965551377,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0068254867568612095,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0068254867568612095,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.06825486421585084,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.734375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.10302471965551377,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0068254867568612095,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0068254867568612095,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.06825486421585084,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.734375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.10302471965551377,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0068254867568612095,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0068254867568612095,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06825486421585084,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.734375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10302471965551377,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0068254867568612095,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0068254867568612095,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06825486421585084,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.734375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10302471965551377,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0068254867568612095,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0068254867568612095,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06825486421585084,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.734375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10302471965551377,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0068254867568612095,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0068254867568612095,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.06825486421585084,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.734375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.10302471965551377,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0068254867568612095,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0068254867568612095,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.08857372999191285,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.709375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.12290655523538589,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008857373148202896,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008857373148202896,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22595709562301636,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.39618055555555554,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34426335990428925,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06944444496184587,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022595709562301634,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022595709562301634,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2071995810953097,
|
|
"calibration/batch_distribution_entropy": 0.7874181292806145,
|
|
"calibration/batch_entropy_100bins": 0.7681012233343304,
|
|
"calibration/batch_entropy_10bins": 0.7874181292806145,
|
|
"calibration/batch_entropy_50bins": 0.7977830057272545,
|
|
"calibration/batch_uniqueness": 0.8884160159309301,
|
|
"calibration/buffer_distribution_entropy": 0.5995247243762318,
|
|
"calibration/buffer_entropy_100bins": 0.5220286299888405,
|
|
"calibration/buffer_entropy_10bins": 0.5995247243762318,
|
|
"calibration/buffer_entropy_50bins": 0.5964127665323024,
|
|
"calibration/confidence_entropy": 0.3926311926708884,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.09383084341533095,
|
|
"calibration/coverage@15%": 0.2434566848971281,
|
|
"calibration/coverage@20%": 0.4486376535129998,
|
|
"calibration/coverage@25%": 0.8128549818899113,
|
|
"calibration/coverage@30%": 0.9794736842105263,
|
|
"calibration/coverage@5%": 0.036011080332409975,
|
|
"calibration/ece": 0.13640778164325235,
|
|
"calibration/mean_confidence": 0.754592191385538,
|
|
"calibration/prompt_uniqueness": 0.7768243776261947,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.020486111111111115,
|
|
"completions/max_length": 3911.0,
|
|
"completions/max_terminated_length": 3911.0,
|
|
"completions/mean_length": 725.5782104492188,
|
|
"completions/mean_terminated_length": 740.8074096679687,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 221.6,
|
|
"epoch": 0.1079986500168748,
|
|
"grad_norm": 0.0006444323225878179,
|
|
"learning_rate": 4.909638554216868e-06,
|
|
"loss": -0.0187,
|
|
"num_tokens": 87557171.0,
|
|
"reward": 0.933354115486145,
|
|
"reward_std": 0.19789280295372008,
|
|
"rewards/accuracy_reward": 0.6422742962837219,
|
|
"rewards/brier_reward": 0.7597587704658508,
|
|
"rewards/confidence_uniqueness_reward": 0.8830222606658935,
|
|
"rewards/format_reward": 0.9785590291023254,
|
|
"rewards/frontier_aurc_reward": -0.002267755405046046,
|
|
"rewards/frontier_coverage_0": 0.01341271074488759,
|
|
"rewards/frontier_coverage_1": 0.01341271074488759,
|
|
"rewards/frontier_coverage_10": 0.01341271074488759,
|
|
"rewards/frontier_coverage_15": 0.01341271074488759,
|
|
"rewards/frontier_coverage_20": 0.01341271074488759,
|
|
"rewards/frontier_coverage_25": 0.01341271074488759,
|
|
"rewards/frontier_coverage_5": 0.01341271074488759,
|
|
"rewards/frontier_ece_reward": 0.02467528488487005,
|
|
"rewards/frontier_entropy_batch_reward": -0.5316874802112579,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.20063476860523224,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21944444444444447,
|
|
"signal/accuracy_reward/group_std_mean": 0.26551105082035065,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2444444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10031738430261612,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10031738430261612,
|
|
"signal/advantage_abs_mean": 0.14899895191192628,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14899895191192628,
|
|
"signal/advantage_pre_scale_std": 0.21511842608451842,
|
|
"signal/advantage_std": 0.21511842608451842,
|
|
"signal/brier_reward/centered_abs_mean": 0.1772002249956131,
|
|
"signal/brier_reward/group_bin_occupancy": 0.7756944444444445,
|
|
"signal/brier_reward/group_std_mean": 0.22932115197181702,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017720023915171624,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017720023915171624,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0835119254887104,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6239583333333334,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.11745427399873734,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008351192437112331,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008351192437112331,
|
|
"signal/format_reward/centered_abs_mean": 0.03505316786468029,
|
|
"signal/format_reward/group_bin_occupancy": 0.1579861111111111,
|
|
"signal/format_reward/group_std_mean": 0.06526159271597862,
|
|
"signal/format_reward/group_zero_std_frac": 0.7361111283302307,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.017526583932340144,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.017526583932340144,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030290879774838688,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6743055555555555,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004711134731769562,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7863600300624964e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7863600300624964e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13084534853696822,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.7878472222222223,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18352725505828857,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.013084535114467144,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.013084535114467144,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13084534853696822,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.7878472222222223,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18352725505828857,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.013084535114467144,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.013084535114467144,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13084534853696822,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.7878472222222223,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18352725505828857,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013084535114467144,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.013084535114467144,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13084534853696822,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.7878472222222223,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18352725505828857,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013084535114467144,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.013084535114467144,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13084534853696822,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.7878472222222223,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.18352725505828857,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.013084535114467144,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.013084535114467144,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13084534853696822,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.7878472222222223,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.18352725505828857,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.013084535114467144,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.013084535114467144,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13084534853696822,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.7878472222222223,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18352725505828857,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.013084535114467144,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.013084535114467144,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.08225937336683273,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7489583333333334,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.12157966494560242,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008225937373936176,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008225937373936176,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.39211310148239137,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.685763888888889,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4667708516120911,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0392113134264946,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0392113134264946,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3759205151272904,
|
|
"calibration/batch_distribution_entropy": 0.9313989667528583,
|
|
"calibration/batch_entropy_100bins": 0.9212269758760405,
|
|
"calibration/batch_entropy_10bins": 0.9313989667528583,
|
|
"calibration/batch_entropy_50bins": 0.9342826864618716,
|
|
"calibration/batch_uniqueness": 0.9449244879054509,
|
|
"calibration/buffer_distribution_entropy": 0.6569622120148912,
|
|
"calibration/buffer_entropy_100bins": 0.599770542385045,
|
|
"calibration/buffer_entropy_10bins": 0.6569622120148912,
|
|
"calibration/buffer_entropy_50bins": 0.6646205868242869,
|
|
"calibration/confidence_entropy": 0.424989166098224,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.007407407407407407,
|
|
"calibration/coverage@15%": 0.01596355714002773,
|
|
"calibration/coverage@20%": 0.04248083071612483,
|
|
"calibration/coverage@25%": 0.0968950909656792,
|
|
"calibration/coverage@30%": 0.3587556687321393,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.22700673017839854,
|
|
"calibration/mean_confidence": 0.608741047048681,
|
|
"calibration/prompt_uniqueness": 0.8550910596171892,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017708333333333347,
|
|
"completions/max_length": 3661.0,
|
|
"completions/max_terminated_length": 3661.0,
|
|
"completions/mean_length": 720.4263061523437,
|
|
"completions/mean_terminated_length": 733.3709228515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 204.4,
|
|
"epoch": 0.11999850001874976,
|
|
"grad_norm": 0.0005708423559553921,
|
|
"learning_rate": 4.759036144578314e-06,
|
|
"loss": -0.0213,
|
|
"num_tokens": 98954082.0,
|
|
"reward": 0.9437960147857666,
|
|
"reward_std": 0.213465416431427,
|
|
"rewards/accuracy_reward": 0.6332465291023255,
|
|
"rewards/brier_reward": 0.7396142482757568,
|
|
"rewards/confidence_uniqueness_reward": 0.9182653784751892,
|
|
"rewards/format_reward": 0.9818576335906982,
|
|
"rewards/frontier_aurc_reward": -0.002504592388868332,
|
|
"rewards/frontier_coverage_0": 0.00757271870970726,
|
|
"rewards/frontier_coverage_1": 0.00757271870970726,
|
|
"rewards/frontier_coverage_10": 0.00757271870970726,
|
|
"rewards/frontier_coverage_15": 0.00757271870970726,
|
|
"rewards/frontier_coverage_20": 0.00757271870970726,
|
|
"rewards/frontier_coverage_25": 0.00757271870970726,
|
|
"rewards/frontier_coverage_5": 0.00757271870970726,
|
|
"rewards/frontier_ece_reward": 0.012652286747470497,
|
|
"rewards/frontier_entropy_batch_reward": -0.3607885718345642,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19530707597732544,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21215277777777777,
|
|
"signal/accuracy_reward/group_std_mean": 0.2530734747648239,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.30277777910232545,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09765353798866272,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09765353798866272,
|
|
"signal/advantage_abs_mean": 0.16423482298851014,
|
|
"signal/advantage_pre_scale_abs_mean": 0.16423482298851014,
|
|
"signal/advantage_pre_scale_std": 0.231108620762825,
|
|
"signal/advantage_std": 0.231108620762825,
|
|
"signal/brier_reward/centered_abs_mean": 0.19700363278388977,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8135416666666666,
|
|
"signal/brier_reward/group_std_mean": 0.24845842123031617,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019700363650918006,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019700363650918006,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05617346540093422,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.732638888888889,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08689026236534118,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005617346568033099,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005617346568033099,
|
|
"signal/format_reward/centered_abs_mean": 0.02997504323720932,
|
|
"signal/format_reward/group_bin_occupancy": 0.15381944444444445,
|
|
"signal/format_reward/group_std_mean": 0.05596600547432899,
|
|
"signal/format_reward/group_zero_std_frac": 0.7694444537162781,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01498752161860466,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01498752161860466,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026452220510691403,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6597222222222222,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004009249992668629,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3065275420085524e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3065275420085524e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19767657220363616,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8114583333333332,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.267059126496315,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.019767657667398453,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.019767657667398453,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19767657220363616,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8114583333333332,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.267059126496315,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.019767657667398453,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.019767657667398453,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19767657220363616,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8114583333333332,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.267059126496315,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.019767657667398453,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.019767657667398453,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19767657220363616,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8114583333333332,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.267059126496315,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019767657667398453,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.019767657667398453,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19767657220363616,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8114583333333332,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.267059126496315,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019767657667398453,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.019767657667398453,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19767657220363616,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8114583333333332,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.267059126496315,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.019767657667398453,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.019767657667398453,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19767657220363616,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8114583333333332,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.267059126496315,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.019767657667398453,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.019767657667398453,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05850343108177185,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0853449210524559,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005850343313068151,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005850343313068151,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3828676402568817,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7503472222222222,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4532123267650604,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.038286763429641726,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038286763429641726,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.11999850001874976,
|
|
"eval_calibration/aurc": 0.23050248842126433,
|
|
"eval_calibration/batch_distribution_entropy": 0.8016154652334767,
|
|
"eval_calibration/batch_entropy_100bins": 0.6871765896490878,
|
|
"eval_calibration/batch_entropy_10bins": 0.8016154652334767,
|
|
"eval_calibration/batch_entropy_50bins": 0.7412875223322276,
|
|
"eval_calibration/batch_uniqueness": 0.8857323642906695,
|
|
"eval_calibration/buffer_distribution_entropy": 0.6890677434360816,
|
|
"eval_calibration/buffer_entropy_100bins": 0.6437866546226053,
|
|
"eval_calibration/buffer_entropy_10bins": 0.6890677434360816,
|
|
"eval_calibration/buffer_entropy_50bins": 0.7017015832612122,
|
|
"eval_calibration/confidence_entropy": 0.40555521402257555,
|
|
"eval_calibration/coverage@0%": 0.10131048387096775,
|
|
"eval_calibration/coverage@1%": 0.10131048387096775,
|
|
"eval_calibration/coverage@10%": 0.21959005376344085,
|
|
"eval_calibration/coverage@15%": 0.34677419354838707,
|
|
"eval_calibration/coverage@20%": 0.6031586021505376,
|
|
"eval_calibration/coverage@25%": 0.7580645161290324,
|
|
"eval_calibration/coverage@30%": 0.926747311827957,
|
|
"eval_calibration/coverage@5%": 0.10131048387096775,
|
|
"eval_calibration/ece": 0.23310729201976518,
|
|
"eval_calibration/mean_confidence": 0.6996752747878766,
|
|
"eval_calibration/prompt_uniqueness": 0.8857323642906695,
|
|
"eval_completions/clipped_ratio": 0.013888888888888876,
|
|
"eval_completions/max_length": 2195.1666666666665,
|
|
"eval_completions/max_terminated_length": 2195.1666666666665,
|
|
"eval_completions/mean_length": 699.8819681803385,
|
|
"eval_completions/mean_terminated_length": 709.8047892252604,
|
|
"eval_completions/min_length": 55.666666666666664,
|
|
"eval_completions/min_terminated_length": 263.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 98954082.0,
|
|
"eval_reward": 0.9046729604403178,
|
|
"eval_reward_std": 0.2568800052007039,
|
|
"eval_rewards/accuracy_reward": 0.6414930621782938,
|
|
"eval_rewards/brier_reward": 0.7675978740056356,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8675300975640615,
|
|
"eval_rewards/format_reward": 0.9861111044883728,
|
|
"eval_rewards/frontier_aurc_reward": -0.002697653331172963,
|
|
"eval_rewards/frontier_coverage_0": 0.034657815316071115,
|
|
"eval_rewards/frontier_coverage_1": 0.034657815316071115,
|
|
"eval_rewards/frontier_coverage_10": 0.034657815316071115,
|
|
"eval_rewards/frontier_coverage_15": 0.034657815316071115,
|
|
"eval_rewards/frontier_coverage_20": 0.034657815316071115,
|
|
"eval_rewards/frontier_coverage_25": 0.034657815316071115,
|
|
"eval_rewards/frontier_coverage_5": 0.034657815316071115,
|
|
"eval_rewards/frontier_ece_reward": 0.01742413399430613,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9861111044883728,
|
|
"eval_runtime": 190.0415,
|
|
"eval_samples_per_second": 5.262,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4460177967945735,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4791330099105835,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22300889839728674,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22300889839728674,
|
|
"eval_signal/advantage_abs_mean": 0.2126754273970922,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2126754273970922,
|
|
"eval_signal/advantage_pre_scale_std": 0.2553383409976959,
|
|
"eval_signal/advantage_std": 0.2553383409976959,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2521931653221448,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.8506944444444445,
|
|
"eval_signal/brier_reward/group_std_mean": 0.31014010310173035,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025219315973420937,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.025219315973420937,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07159827401240666,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.4791666666666666,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.11320321013530095,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007159827587505181,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007159827587505181,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.026692708333333332,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.17361111111111108,
|
|
"eval_signal/format_reward/group_std_mean": 0.07258860146005948,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.611111119389534,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013346354166666666,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.013346354166666666,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0034897019310543933,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.5729166666666666,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.006318512372672558,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.362127553273846e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.362127553273846e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.20838888734579086,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.8645833333333335,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.3282380948464076,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02083888774116834,
|
|
"eval_signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.02083888774116834,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.20838888734579086,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.8645833333333335,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.3282380948464076,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02083888774116834,
|
|
"eval_signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.02083888774116834,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.20838888734579086,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.8645833333333335,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.3282380948464076,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02083888774116834,
|
|
"eval_signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.02083888774116834,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.20838888734579086,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.8645833333333335,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.3282380948464076,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02083888774116834,
|
|
"eval_signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.02083888774116834,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.20838888734579086,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8645833333333335,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.3282380948464076,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02083888774116834,
|
|
"eval_signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.02083888774116834,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.20838888734579086,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.8645833333333335,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.3282380948464076,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02083888774116834,
|
|
"eval_signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.02083888774116834,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.20838888734579086,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.8645833333333335,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.3282380948464076,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02083888774116834,
|
|
"eval_signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.02083888774116834,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.04939149754742781,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9027777777777777,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.07122303297122319,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004939149754742782,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004939149754742782,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.026692708333333332,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.17361111111111108,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.07258860146005948,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.611111119389534,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.002669270926465591,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002669270926465591,
|
|
"eval_steps_per_second": 0.032,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24374448404841295,
|
|
"calibration/batch_distribution_entropy": 0.9289752783640071,
|
|
"calibration/batch_entropy_100bins": 0.9246578251741718,
|
|
"calibration/batch_entropy_10bins": 0.9289752783640071,
|
|
"calibration/batch_entropy_50bins": 0.9365362505281345,
|
|
"calibration/batch_uniqueness": 0.9475130563940756,
|
|
"calibration/buffer_distribution_entropy": 0.7049394681050327,
|
|
"calibration/buffer_entropy_100bins": 0.6685656833127869,
|
|
"calibration/buffer_entropy_10bins": 0.7049394681050327,
|
|
"calibration/buffer_entropy_50bins": 0.7214629566595602,
|
|
"calibration/confidence_entropy": 0.4460111133708583,
|
|
"calibration/coverage@0%": 0.01389920880632955,
|
|
"calibration/coverage@1%": 0.01389920880632955,
|
|
"calibration/coverage@10%": 0.04575679394564843,
|
|
"calibration/coverage@15%": 0.17181752678656706,
|
|
"calibration/coverage@20%": 0.5509832004253057,
|
|
"calibration/coverage@25%": 0.6381154642399224,
|
|
"calibration/coverage@30%": 0.7024434499796729,
|
|
"calibration/coverage@5%": 0.026581449166588483,
|
|
"calibration/ece": 0.1462999248914844,
|
|
"calibration/mean_confidence": 0.6280517518430525,
|
|
"calibration/prompt_uniqueness": 0.8650267205744318,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01692708333333335,
|
|
"completions/max_length": 3483.2,
|
|
"completions/max_terminated_length": 3483.2,
|
|
"completions/mean_length": 713.0258666992188,
|
|
"completions/mean_terminated_length": 725.3185424804688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 171.0,
|
|
"epoch": 0.13199835002062474,
|
|
"grad_norm": 0.0005794555763714015,
|
|
"learning_rate": 4.60843373493976e-06,
|
|
"loss": -0.02,
|
|
"num_tokens": 110248716.0,
|
|
"reward": 0.9654149889945984,
|
|
"reward_std": 0.20734579861164093,
|
|
"rewards/accuracy_reward": 0.6355902910232544,
|
|
"rewards/brier_reward": 0.7653455853462219,
|
|
"rewards/confidence_uniqueness_reward": 0.9295515418052673,
|
|
"rewards/format_reward": 0.9824652910232544,
|
|
"rewards/frontier_aurc_reward": -0.0020523636834695936,
|
|
"rewards/frontier_coverage_0": 0.0300428228918463,
|
|
"rewards/frontier_coverage_1": 0.0300428228918463,
|
|
"rewards/frontier_coverage_10": 0.0300428228918463,
|
|
"rewards/frontier_coverage_15": 0.0300428228918463,
|
|
"rewards/frontier_coverage_20": 0.0300428228918463,
|
|
"rewards/frontier_coverage_25": 0.0300428228918463,
|
|
"rewards/frontier_coverage_5": 0.0300428228918463,
|
|
"rewards/frontier_ece_reward": 0.012284515798091889,
|
|
"rewards/frontier_entropy_batch_reward": -0.35335326194763184,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18991970717906953,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.2125,
|
|
"signal/accuracy_reward/group_std_mean": 0.24853154718875886,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.30000001192092896,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09495985358953477,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09495985358953477,
|
|
"signal/advantage_abs_mean": 0.16074307560920714,
|
|
"signal/advantage_pre_scale_abs_mean": 0.16074307560920714,
|
|
"signal/advantage_pre_scale_std": 0.22943655252456666,
|
|
"signal/advantage_std": 0.22943655252456666,
|
|
"signal/brier_reward/centered_abs_mean": 0.18310473561286927,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8090277777777779,
|
|
"signal/brier_reward/group_std_mean": 0.23206418752670288,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01831047348678112,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01831047348678112,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.045183032751083374,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7715277777777778,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07202807888388633,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004518303461372853,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004518303461372853,
|
|
"signal/format_reward/centered_abs_mean": 0.02825520820915699,
|
|
"signal/format_reward/group_bin_occupancy": 0.15104166666666669,
|
|
"signal/format_reward/group_std_mean": 0.05186620131134987,
|
|
"signal/format_reward/group_zero_std_frac": 0.7916666746139527,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014127604104578495,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.014127604104578495,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017425427678972483,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6809027777777777,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002860198658891022,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1781784744234757e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1781784744234757e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20096865594387053,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.804513888888889,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2704664647579193,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02009686529636383,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.02009686529636383,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20096865594387053,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.804513888888889,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2704664647579193,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02009686529636383,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.02009686529636383,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20096865594387053,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.804513888888889,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2704664647579193,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02009686529636383,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.02009686529636383,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20096865594387053,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.804513888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2704664647579193,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02009686529636383,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.02009686529636383,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20096865594387053,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.804513888888889,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2704664647579193,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02009686529636383,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.02009686529636383,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20096865594387053,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.804513888888889,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2704664647579193,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02009686529636383,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.02009686529636383,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20096865594387053,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.804513888888889,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2704664647579193,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02009686529636383,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.02009686529636383,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.038172975182533264,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.83125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.055047205090522765,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0038172977045178415,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0038172977045178415,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36611982583999636,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7548611111111112,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.43662421107292176,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.036611984670162204,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.036611984670162204,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.286214721782296,
|
|
"calibration/batch_distribution_entropy": 0.9343979772512739,
|
|
"calibration/batch_entropy_100bins": 0.9207935974888588,
|
|
"calibration/batch_entropy_10bins": 0.9343979772512739,
|
|
"calibration/batch_entropy_50bins": 0.9404285160285129,
|
|
"calibration/batch_uniqueness": 0.9529432818966646,
|
|
"calibration/buffer_distribution_entropy": 0.741460205758367,
|
|
"calibration/buffer_entropy_100bins": 0.7172425168033953,
|
|
"calibration/buffer_entropy_10bins": 0.741460205758367,
|
|
"calibration/buffer_entropy_50bins": 0.7623217046840285,
|
|
"calibration/confidence_entropy": 0.4786053268474739,
|
|
"calibration/coverage@0%": 0.006835482317912184,
|
|
"calibration/coverage@1%": 0.006835482317912184,
|
|
"calibration/coverage@10%": 0.08199339007752038,
|
|
"calibration/coverage@15%": 0.3267279418616805,
|
|
"calibration/coverage@20%": 0.3989069448826226,
|
|
"calibration/coverage@25%": 0.4664435090943801,
|
|
"calibration/coverage@30%": 0.7006002191928997,
|
|
"calibration/coverage@5%": 0.006835482317912184,
|
|
"calibration/ece": 0.16097266382692185,
|
|
"calibration/mean_confidence": 0.6059373040768069,
|
|
"calibration/prompt_uniqueness": 0.8695281357757139,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011458333333333326,
|
|
"completions/max_length": 3914.0,
|
|
"completions/max_terminated_length": 3914.0,
|
|
"completions/mean_length": 709.5424438476563,
|
|
"completions/mean_terminated_length": 717.8249145507813,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 187.2,
|
|
"epoch": 0.14399820002249972,
|
|
"grad_norm": 0.00047888929839245975,
|
|
"learning_rate": 4.457831325301205e-06,
|
|
"loss": -0.0162,
|
|
"num_tokens": 121519221.0,
|
|
"reward": 0.9763705134391785,
|
|
"reward_std": 0.196681210398674,
|
|
"rewards/accuracy_reward": 0.615711796283722,
|
|
"rewards/brier_reward": 0.7821933507919312,
|
|
"rewards/confidence_uniqueness_reward": 0.9381864786148071,
|
|
"rewards/format_reward": 0.9881944537162781,
|
|
"rewards/frontier_aurc_reward": -0.001919442624785006,
|
|
"rewards/frontier_coverage_0": 0.06152722500264644,
|
|
"rewards/frontier_coverage_1": 0.06152722500264644,
|
|
"rewards/frontier_coverage_10": 0.06152722500264644,
|
|
"rewards/frontier_coverage_15": 0.06152722500264644,
|
|
"rewards/frontier_coverage_20": 0.06152722500264644,
|
|
"rewards/frontier_coverage_25": 0.06152722500264644,
|
|
"rewards/frontier_coverage_5": 0.06152722500264644,
|
|
"rewards/frontier_ece_reward": 0.011796734295785427,
|
|
"rewards/frontier_entropy_batch_reward": -0.41845354437828064,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2031087249517441,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21458333333333335,
|
|
"signal/accuracy_reward/group_std_mean": 0.2622006803750992,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.28333333432674407,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10155436247587205,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10155436247587205,
|
|
"signal/advantage_abs_mean": 0.15023626685142516,
|
|
"signal/advantage_pre_scale_abs_mean": 0.15023626685142516,
|
|
"signal/advantage_pre_scale_std": 0.21854868531227112,
|
|
"signal/advantage_std": 0.21854868531227112,
|
|
"signal/brier_reward/centered_abs_mean": 0.16674597859382628,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8020833333333334,
|
|
"signal/brier_reward/group_std_mean": 0.21431083381175994,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016674598678946495,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016674598678946495,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.038017303496599195,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7871527777777778,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0630945160984993,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038017303217202426,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038017303217202426,
|
|
"signal/format_reward/centered_abs_mean": 0.02126736082136631,
|
|
"signal/format_reward/group_bin_occupancy": 0.14895833333333333,
|
|
"signal/format_reward/group_std_mean": 0.043730095773935315,
|
|
"signal/format_reward/group_zero_std_frac": 0.8083333492279052,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010633680410683155,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010633680410683155,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013663872377946973,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.704861111111111,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002126425364986062,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7079840290534776e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7079840290534776e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19963845312595369,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8038194444444444,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.27037686109542847,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.019963844493031502,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.019963844493031502,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19963845312595369,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8038194444444444,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27037686109542847,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.019963844493031502,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.019963844493031502,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19963845312595369,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8038194444444444,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27037686109542847,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.019963844493031502,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.019963844493031502,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19963845312595369,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8038194444444444,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27037686109542847,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019963844493031502,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.019963844493031502,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19963845312595369,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8038194444444444,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27037686109542847,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019963844493031502,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.019963844493031502,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19963845312595369,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8038194444444444,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27037686109542847,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.019963844493031502,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.019963844493031502,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19963845312595369,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8038194444444444,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27037686109542847,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.019963844493031502,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.019963844493031502,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03020486868917942,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8215277777777776,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04409870654344559,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030204871203750373,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030204871203750373,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3919844150543213,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7600694444444444,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4584620654582977,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.039198441058397294,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.039198441058397294,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1940140969079807,
|
|
"calibration/batch_distribution_entropy": 0.8938569295594766,
|
|
"calibration/batch_entropy_100bins": 0.9033844517682443,
|
|
"calibration/batch_entropy_10bins": 0.8938569295594766,
|
|
"calibration/batch_entropy_50bins": 0.915012654661888,
|
|
"calibration/batch_uniqueness": 0.943383837453504,
|
|
"calibration/buffer_distribution_entropy": 0.7683503092367889,
|
|
"calibration/buffer_entropy_100bins": 0.7539086742860022,
|
|
"calibration/buffer_entropy_10bins": 0.7683503092367889,
|
|
"calibration/buffer_entropy_50bins": 0.793019939892889,
|
|
"calibration/confidence_entropy": 0.40771152928483917,
|
|
"calibration/coverage@0%": 0.016897250780865843,
|
|
"calibration/coverage@1%": 0.016897250780865843,
|
|
"calibration/coverage@10%": 0.3848949748426148,
|
|
"calibration/coverage@15%": 0.5550847520854665,
|
|
"calibration/coverage@20%": 0.6380853427409805,
|
|
"calibration/coverage@25%": 0.6896129223874049,
|
|
"calibration/coverage@30%": 0.7316552557390255,
|
|
"calibration/coverage@5%": 0.15476867787235124,
|
|
"calibration/ece": 0.1408602524415273,
|
|
"calibration/mean_confidence": 0.607529479335007,
|
|
"calibration/prompt_uniqueness": 0.8356702266069405,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009114583333333325,
|
|
"completions/max_length": 3560.6,
|
|
"completions/max_terminated_length": 3560.6,
|
|
"completions/mean_length": 672.5094848632813,
|
|
"completions/mean_terminated_length": 678.7419921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 157.8,
|
|
"epoch": 0.1559980500243747,
|
|
"grad_norm": 0.0007051236461848021,
|
|
"learning_rate": 4.307228915662651e-06,
|
|
"loss": -0.0121,
|
|
"num_tokens": 132360578.0,
|
|
"reward": 0.9930898666381835,
|
|
"reward_std": 0.20509625375270843,
|
|
"rewards/accuracy_reward": 0.6331597208976746,
|
|
"rewards/brier_reward": 0.7913673639297485,
|
|
"rewards/confidence_uniqueness_reward": 0.9348729729652405,
|
|
"rewards/format_reward": 0.9905381917953491,
|
|
"rewards/frontier_aurc_reward": -0.0016889730701223015,
|
|
"rewards/frontier_coverage_0": 0.07102360390126705,
|
|
"rewards/frontier_coverage_1": 0.07102360390126705,
|
|
"rewards/frontier_coverage_10": 0.07102360390126705,
|
|
"rewards/frontier_coverage_15": 0.07102360390126705,
|
|
"rewards/frontier_coverage_20": 0.07102360390126705,
|
|
"rewards/frontier_coverage_25": 0.07102360390126705,
|
|
"rewards/frontier_coverage_5": 0.07102360390126705,
|
|
"rewards/frontier_ece_reward": 0.013031562231481076,
|
|
"rewards/frontier_entropy_batch_reward": -0.4238172650337219,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18643662929534913,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21354166666666669,
|
|
"signal/accuracy_reward/group_std_mean": 0.24793701171875,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2916666716337204,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09321831464767456,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09321831464767456,
|
|
"signal/advantage_abs_mean": 0.15872004330158235,
|
|
"signal/advantage_pre_scale_abs_mean": 0.15872004330158235,
|
|
"signal/advantage_pre_scale_std": 0.22737123370170592,
|
|
"signal/advantage_std": 0.22737123370170592,
|
|
"signal/brier_reward/centered_abs_mean": 0.17829251885414124,
|
|
"signal/brier_reward/group_bin_occupancy": 0.7753472222222222,
|
|
"signal/brier_reward/group_std_mean": 0.22675358057022094,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017829251661896705,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017829251661896705,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03666983284056187,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8017361111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0584432914853096,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036669834051281215,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036669834051281215,
|
|
"signal/format_reward/centered_abs_mean": 0.01703016497194767,
|
|
"signal/format_reward/group_bin_occupancy": 0.14479166666666668,
|
|
"signal/format_reward/group_std_mean": 0.035620180889964104,
|
|
"signal/format_reward/group_zero_std_frac": 0.8416666865348816,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008515082485973834,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008515082485973834,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013369303196668625,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.696875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002153940638527274,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.671162899583578e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.671162899583578e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.22538237869739533,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.7586805555555556,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3036982655525208,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.022538238018751145,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.022538238018751145,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22538237869739533,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.7586805555555556,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3036982655525208,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.022538238018751145,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.022538238018751145,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22538237869739533,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.7586805555555556,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3036982655525208,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.022538238018751145,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.022538238018751145,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22538237869739533,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.7586805555555556,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3036982655525208,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.022538238018751145,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.022538238018751145,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22538237869739533,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.7586805555555556,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3036982655525208,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022538238018751145,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.022538238018751145,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22538237869739533,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.7586805555555556,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3036982655525208,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.022538238018751145,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.022538238018751145,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22538237869739533,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.7586805555555556,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3036982655525208,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.022538238018751145,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.022538238018751145,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.030462851002812385,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8322916666666667,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0436931237578392,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030462852213531733,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030462852213531733,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3919891953468323,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7576388888888889,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4605256378650665,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.039198920130729675,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.039198920130729675,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27194618970235607,
|
|
"calibration/batch_distribution_entropy": 0.8945873610755051,
|
|
"calibration/batch_entropy_100bins": 0.9131396367014031,
|
|
"calibration/batch_entropy_10bins": 0.8945873610755051,
|
|
"calibration/batch_entropy_50bins": 0.9192407527206786,
|
|
"calibration/batch_uniqueness": 0.9371901885401208,
|
|
"calibration/buffer_distribution_entropy": 0.7877129357086162,
|
|
"calibration/buffer_entropy_100bins": 0.78102191001134,
|
|
"calibration/buffer_entropy_10bins": 0.7877129357086162,
|
|
"calibration/buffer_entropy_50bins": 0.8151882911475585,
|
|
"calibration/confidence_entropy": 0.39241385084998825,
|
|
"calibration/coverage@0%": 0.05326375040260959,
|
|
"calibration/coverage@1%": 0.05639690967153911,
|
|
"calibration/coverage@10%": 0.12010448147310568,
|
|
"calibration/coverage@15%": 0.14781648540668707,
|
|
"calibration/coverage@20%": 0.25953707177758134,
|
|
"calibration/coverage@25%": 0.3467758145563071,
|
|
"calibration/coverage@30%": 0.69023526244398,
|
|
"calibration/coverage@5%": 0.11070500366631716,
|
|
"calibration/ece": 0.14855692656439354,
|
|
"calibration/mean_confidence": 0.592146170413358,
|
|
"calibration/prompt_uniqueness": 0.8288111311405574,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.007118055555555558,
|
|
"completions/max_length": 3017.8,
|
|
"completions/max_terminated_length": 3017.8,
|
|
"completions/mean_length": 649.9505249023438,
|
|
"completions/mean_terminated_length": 654.6026123046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 146.8,
|
|
"epoch": 0.16799790002624967,
|
|
"grad_norm": 0.000531592988409102,
|
|
"learning_rate": 4.156626506024097e-06,
|
|
"loss": -0.0119,
|
|
"num_tokens": 142926152.0,
|
|
"reward": 0.9971241235733033,
|
|
"reward_std": 0.1946073591709137,
|
|
"rewards/accuracy_reward": 0.6058159708976746,
|
|
"rewards/brier_reward": 0.7913841605186462,
|
|
"rewards/confidence_uniqueness_reward": 0.9257380962371826,
|
|
"rewards/format_reward": 0.9927083492279053,
|
|
"rewards/frontier_aurc_reward": -0.0018725383095443248,
|
|
"rewards/frontier_coverage_0": 0.09908072724938392,
|
|
"rewards/frontier_coverage_1": 0.09908072724938392,
|
|
"rewards/frontier_coverage_10": 0.09908072724938392,
|
|
"rewards/frontier_coverage_15": 0.09908072724938392,
|
|
"rewards/frontier_coverage_20": 0.09908072724938392,
|
|
"rewards/frontier_coverage_25": 0.09908072724938392,
|
|
"rewards/frontier_coverage_5": 0.09908072724938392,
|
|
"rewards/frontier_ece_reward": 0.014820458181202412,
|
|
"rewards/frontier_entropy_batch_reward": -0.4466545760631561,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18914388120174408,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21423611111111113,
|
|
"signal/accuracy_reward/group_std_mean": 0.2506607919931412,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.28611111342906953,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09457194060087204,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09457194060087204,
|
|
"signal/advantage_abs_mean": 0.14794844686985015,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14794844686985015,
|
|
"signal/advantage_pre_scale_std": 0.2173856317996979,
|
|
"signal/advantage_std": 0.2173856317996979,
|
|
"signal/brier_reward/centered_abs_mean": 0.1662675827741623,
|
|
"signal/brier_reward/group_bin_occupancy": 0.7461805555555555,
|
|
"signal/brier_reward/group_std_mean": 0.21699636280536652,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01662675738334656,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01662675738334656,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03762320056557655,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8291666666666666,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05675676092505455,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037623200565576552,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037623200565576552,
|
|
"signal/format_reward/centered_abs_mean": 0.01309678815305233,
|
|
"signal/format_reward/group_bin_occupancy": 0.14166666666666666,
|
|
"signal/format_reward/group_std_mean": 0.028634771704673767,
|
|
"signal/format_reward/group_zero_std_frac": 0.8666666746139526,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006548394076526165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006548394076526165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015188657911494375,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6951388888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002341068908572197,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8985822680406273e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8985822680406273e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2185291677713394,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.7368055555555557,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2952149331569672,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.021852916106581688,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.021852916106581688,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2185291677713394,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.7368055555555557,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2952149331569672,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.021852916106581688,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.021852916106581688,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2185291677713394,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.7368055555555557,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2952149331569672,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.021852916106581688,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.021852916106581688,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2185291677713394,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.7368055555555557,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2952149331569672,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021852916106581688,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.021852916106581688,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2185291677713394,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.7368055555555557,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2952149331569672,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021852916106581688,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.021852916106581688,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2185291677713394,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.7368055555555557,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2952149331569672,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.021852916106581688,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.021852916106581688,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2185291677713394,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.7368055555555557,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2952149331569672,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.021852916106581688,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.021852916106581688,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.030513783916831017,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8253472222222221,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04309252202510834,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030513783451169727,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030513783451169727,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.39379770755767823,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7395833333333333,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.46024208068847655,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03937977254390716,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03937977254390716,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19853769462627072,
|
|
"calibration/batch_distribution_entropy": 0.8961173951478291,
|
|
"calibration/batch_entropy_100bins": 0.912091298899328,
|
|
"calibration/batch_entropy_10bins": 0.8961173951478291,
|
|
"calibration/batch_entropy_50bins": 0.9179681022942207,
|
|
"calibration/batch_uniqueness": 0.9348791493897538,
|
|
"calibration/buffer_distribution_entropy": 0.8003268741179234,
|
|
"calibration/buffer_entropy_100bins": 0.80209686504773,
|
|
"calibration/buffer_entropy_10bins": 0.8003268741179234,
|
|
"calibration/buffer_entropy_50bins": 0.8315238709106142,
|
|
"calibration/confidence_entropy": 0.4322237036978451,
|
|
"calibration/coverage@0%": 0.04036413590679416,
|
|
"calibration/coverage@1%": 0.04036413590679416,
|
|
"calibration/coverage@10%": 0.376274421695374,
|
|
"calibration/coverage@15%": 0.4921271605087534,
|
|
"calibration/coverage@20%": 0.6069650945898191,
|
|
"calibration/coverage@25%": 0.6777106476805124,
|
|
"calibration/coverage@30%": 0.7231799896952824,
|
|
"calibration/coverage@5%": 0.22371359918982808,
|
|
"calibration/ece": 0.09717652729180778,
|
|
"calibration/mean_confidence": 0.6359411054528509,
|
|
"calibration/prompt_uniqueness": 0.8333672983024417,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.005295138888888884,
|
|
"completions/max_length": 3205.2,
|
|
"completions/max_terminated_length": 3205.2,
|
|
"completions/mean_length": 677.5071166992187,
|
|
"completions/mean_terminated_length": 681.2035278320312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.0,
|
|
"epoch": 0.17999775002812465,
|
|
"grad_norm": 0.0005247980006970465,
|
|
"learning_rate": 4.006024096385543e-06,
|
|
"loss": -0.0067,
|
|
"num_tokens": 153795930.0,
|
|
"reward": 1.0187444925308227,
|
|
"reward_std": 0.17705624103546141,
|
|
"rewards/accuracy_reward": 0.6374131917953492,
|
|
"rewards/brier_reward": 0.8208310961723327,
|
|
"rewards/confidence_uniqueness_reward": 0.929869544506073,
|
|
"rewards/format_reward": 0.9943576455116272,
|
|
"rewards/frontier_aurc_reward": -0.001502724504098296,
|
|
"rewards/frontier_coverage_0": 0.09719437658786774,
|
|
"rewards/frontier_coverage_1": 0.09719437658786774,
|
|
"rewards/frontier_coverage_10": 0.09719437658786774,
|
|
"rewards/frontier_coverage_15": 0.09719437658786774,
|
|
"rewards/frontier_coverage_20": 0.09719437658786774,
|
|
"rewards/frontier_coverage_25": 0.09719437658786774,
|
|
"rewards/frontier_coverage_5": 0.09719437658786774,
|
|
"rewards/frontier_ece_reward": 0.014616208896040917,
|
|
"rewards/frontier_entropy_batch_reward": -0.4168988406658173,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2002549946308136,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21215277777777777,
|
|
"signal/accuracy_reward/group_std_mean": 0.2571962982416153,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3027777850627899,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1001274973154068,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1001274973154068,
|
|
"signal/advantage_abs_mean": 0.132818341255188,
|
|
"signal/advantage_pre_scale_abs_mean": 0.132818341255188,
|
|
"signal/advantage_pre_scale_std": 0.19871813356876372,
|
|
"signal/advantage_std": 0.19871813356876372,
|
|
"signal/brier_reward/centered_abs_mean": 0.14455785602331161,
|
|
"signal/brier_reward/group_bin_occupancy": 0.7569444444444444,
|
|
"signal/brier_reward/group_std_mean": 0.1915825366973877,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014455785788595677,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014455785788595677,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03241968899965286,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8350694444444444,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05105073526501656,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003241968993097544,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003241968993097544,
|
|
"signal/format_reward/centered_abs_mean": 0.010378689225763082,
|
|
"signal/format_reward/group_bin_occupancy": 0.140625,
|
|
"signal/format_reward/group_std_mean": 0.02502230368554592,
|
|
"signal/format_reward/group_zero_std_frac": 0.875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005189344612881541,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005189344612881541,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012487402884289623,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7135416666666666,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0019265936687588691,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.56092533870833e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.56092533870833e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21457959413528443,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.751736111111111,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.29035847187042235,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.021457960084080695,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.021457960084080695,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21457959413528443,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.751736111111111,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.29035847187042235,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.021457960084080695,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.021457960084080695,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21457959413528443,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.751736111111111,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.29035847187042235,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.021457960084080695,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.021457960084080695,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21457959413528443,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.751736111111111,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29035847187042235,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021457960084080695,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.021457960084080695,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21457959413528443,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.751736111111111,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.29035847187042235,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021457960084080695,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.021457960084080695,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21457959413528443,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.751736111111111,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.29035847187042235,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.021457960084080695,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.021457960084080695,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21457959413528443,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.751736111111111,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.29035847187042235,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.021457960084080695,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.021457960084080695,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.028026602417230605,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7993055555555555,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.040372003614902494,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028026602696627377,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028026602696627377,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38128851652145385,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7371527777777778,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4472618103027344,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03812885135412216,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03812885135412216,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21202390893452958,
|
|
"calibration/batch_distribution_entropy": 0.892489770208589,
|
|
"calibration/batch_entropy_100bins": 0.9105575405217194,
|
|
"calibration/batch_entropy_10bins": 0.892489770208589,
|
|
"calibration/batch_entropy_50bins": 0.9111948565750734,
|
|
"calibration/batch_uniqueness": 0.9341327315004448,
|
|
"calibration/buffer_distribution_entropy": 0.8131000388841617,
|
|
"calibration/buffer_entropy_100bins": 0.8196942522115224,
|
|
"calibration/buffer_entropy_10bins": 0.8131000388841617,
|
|
"calibration/buffer_entropy_50bins": 0.8455434265915864,
|
|
"calibration/confidence_entropy": 0.42654255632151405,
|
|
"calibration/coverage@0%": 0.014085920925357768,
|
|
"calibration/coverage@1%": 0.014085920925357768,
|
|
"calibration/coverage@10%": 0.2734964364699554,
|
|
"calibration/coverage@15%": 0.32363363627234387,
|
|
"calibration/coverage@20%": 0.5826203423632129,
|
|
"calibration/coverage@25%": 0.7059773098189186,
|
|
"calibration/coverage@30%": 0.793550096136303,
|
|
"calibration/coverage@5%": 0.11101395617339956,
|
|
"calibration/ece": 0.13445512312765592,
|
|
"calibration/mean_confidence": 0.606925372178709,
|
|
"calibration/prompt_uniqueness": 0.8242422611106097,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.005034722222222232,
|
|
"completions/max_length": 3563.0,
|
|
"completions/max_terminated_length": 3563.0,
|
|
"completions/mean_length": 714.0626708984375,
|
|
"completions/mean_terminated_length": 717.676416015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 145.2,
|
|
"epoch": 0.19199760002999963,
|
|
"grad_norm": 0.0004452217253856361,
|
|
"learning_rate": 3.855421686746989e-06,
|
|
"loss": -0.0068,
|
|
"num_tokens": 165075212.0,
|
|
"reward": 1.0306865215301513,
|
|
"reward_std": 0.18099702000617982,
|
|
"rewards/accuracy_reward": 0.5858506977558136,
|
|
"rewards/brier_reward": 0.8206128120422364,
|
|
"rewards/confidence_uniqueness_reward": 0.9320234894752503,
|
|
"rewards/format_reward": 0.99453125,
|
|
"rewards/frontier_aurc_reward": -0.0016457670368254184,
|
|
"rewards/frontier_coverage_0": 0.14450157731771468,
|
|
"rewards/frontier_coverage_1": 0.14450157731771468,
|
|
"rewards/frontier_coverage_10": 0.14450157731771468,
|
|
"rewards/frontier_coverage_15": 0.14450157731771468,
|
|
"rewards/frontier_coverage_20": 0.14450157731771468,
|
|
"rewards/frontier_coverage_25": 0.14450157731771468,
|
|
"rewards/frontier_coverage_5": 0.14450157731771468,
|
|
"rewards/frontier_ece_reward": 0.012375526875257493,
|
|
"rewards/frontier_entropy_batch_reward": -0.37136178016662597,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.21248372495174409,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21597222222222223,
|
|
"signal/accuracy_reward/group_std_mean": 0.2697928935289383,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.272222226858139,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10624186247587204,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10624186247587204,
|
|
"signal/advantage_abs_mean": 0.13688016831874847,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13688016831874847,
|
|
"signal/advantage_pre_scale_std": 0.19873642921447754,
|
|
"signal/advantage_std": 0.19873642921447754,
|
|
"signal/brier_reward/centered_abs_mean": 0.1460920125246048,
|
|
"signal/brier_reward/group_bin_occupancy": 0.7590277777777777,
|
|
"signal/brier_reward/group_std_mean": 0.19507495164871216,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01460920162498951,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01460920162498951,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03345326967537403,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8486111111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04838352724909782,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003345327032729983,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003345327032729983,
|
|
"signal/format_reward/centered_abs_mean": 0.009836154337972403,
|
|
"signal/format_reward/group_bin_occupancy": 0.13541666666666669,
|
|
"signal/format_reward/group_std_mean": 0.019612624868750574,
|
|
"signal/format_reward/group_zero_std_frac": 0.9166666865348816,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004918077168986202,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.004918077168986202,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013075984083116055,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.704861111111111,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002080147247761488,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6344982032023837e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6344982032023837e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2356353372335434,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.7520833333333333,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3123137831687927,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.023563534021377563,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.023563534021377563,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2356353372335434,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.7520833333333333,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3123137831687927,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.023563534021377563,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.023563534021377563,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2356353372335434,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.7520833333333333,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3123137831687927,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.023563534021377563,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.023563534021377563,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2356353372335434,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.7520833333333333,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3123137831687927,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.023563534021377563,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.023563534021377563,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2356353372335434,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.7520833333333333,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3123137831687927,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023563534021377563,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.023563534021377563,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2356353372335434,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.7520833333333333,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3123137831687927,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.023563534021377563,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.023563534021377563,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2356353372335434,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.7520833333333333,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3123137831687927,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.023563534021377563,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.023563534021377563,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.025293727964162828,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.809375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.036943011730909345,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002529372926801443,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002529372926801443,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3852746546268463,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.753125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4537887334823608,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0385274663567543,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0385274663567543,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19887768920185828,
|
|
"calibration/batch_distribution_entropy": 0.8582793325801639,
|
|
"calibration/batch_entropy_100bins": 0.874851770121175,
|
|
"calibration/batch_entropy_10bins": 0.8582793325801639,
|
|
"calibration/batch_entropy_50bins": 0.8765912987534377,
|
|
"calibration/batch_uniqueness": 0.8977951813469284,
|
|
"calibration/buffer_distribution_entropy": 0.8264599677058859,
|
|
"calibration/buffer_entropy_100bins": 0.8342157208556948,
|
|
"calibration/buffer_entropy_10bins": 0.8264599677058859,
|
|
"calibration/buffer_entropy_50bins": 0.8574377668441269,
|
|
"calibration/confidence_entropy": 0.3511930719190495,
|
|
"calibration/coverage@0%": 0.04928223038710476,
|
|
"calibration/coverage@1%": 0.04928223038710476,
|
|
"calibration/coverage@10%": 0.3719955622775556,
|
|
"calibration/coverage@15%": 0.5484676920756779,
|
|
"calibration/coverage@20%": 0.6220975910365909,
|
|
"calibration/coverage@25%": 0.6842214398364115,
|
|
"calibration/coverage@30%": 0.7458859138684379,
|
|
"calibration/coverage@5%": 0.18465859511343805,
|
|
"calibration/ece": 0.08830501621552686,
|
|
"calibration/mean_confidence": 0.4861689044034094,
|
|
"calibration/prompt_uniqueness": 0.7554502920014838,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.003385416666666674,
|
|
"completions/max_length": 3001.6,
|
|
"completions/max_terminated_length": 3001.6,
|
|
"completions/mean_length": 723.7444458007812,
|
|
"completions/mean_terminated_length": 726.2084716796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 194.2,
|
|
"epoch": 0.2039974500318746,
|
|
"grad_norm": 0.000452884822152555,
|
|
"learning_rate": 3.7048192771084342e-06,
|
|
"loss": -0.0038,
|
|
"num_tokens": 176499948.0,
|
|
"reward": 1.0673803091049194,
|
|
"reward_std": 0.16003829836845399,
|
|
"rewards/accuracy_reward": 0.57890625,
|
|
"rewards/brier_reward": 0.8611330986022949,
|
|
"rewards/confidence_uniqueness_reward": 0.9070895791053772,
|
|
"rewards/format_reward": 0.9963541626930237,
|
|
"rewards/frontier_aurc_reward": -0.0014048191718757153,
|
|
"rewards/frontier_coverage_0": 0.20752938687801362,
|
|
"rewards/frontier_coverage_1": 0.20752938687801362,
|
|
"rewards/frontier_coverage_10": 0.20752938687801362,
|
|
"rewards/frontier_coverage_15": 0.20752938687801362,
|
|
"rewards/frontier_coverage_20": 0.20752938687801362,
|
|
"rewards/frontier_coverage_25": 0.20752938687801362,
|
|
"rewards/frontier_coverage_5": 0.20752938687801362,
|
|
"rewards/frontier_ece_reward": 0.016395201347768305,
|
|
"rewards/frontier_entropy_batch_reward": -0.4396472811698914,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1973470091819763,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21805555555555559,
|
|
"signal/accuracy_reward/group_std_mean": 0.2611843168735504,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2555555611848831,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09867350459098816,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09867350459098816,
|
|
"signal/advantage_abs_mean": 0.11756447702646255,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11756447702646255,
|
|
"signal/advantage_pre_scale_std": 0.1783807784318924,
|
|
"signal/advantage_std": 0.1783807784318924,
|
|
"signal/brier_reward/centered_abs_mean": 0.1257694497704506,
|
|
"signal/brier_reward/group_bin_occupancy": 0.7149305555555556,
|
|
"signal/brier_reward/group_std_mean": 0.17371391355991364,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012576944567263127,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012576944567263127,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.053402946889400484,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7479166666666667,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07172206267714501,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005340294633060694,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005340294633060694,
|
|
"signal/format_reward/centered_abs_mean": 0.006532118155155331,
|
|
"signal/format_reward/group_bin_occupancy": 0.134375,
|
|
"signal/format_reward/group_std_mean": 0.015146102197468281,
|
|
"signal/format_reward/group_zero_std_frac": 0.925,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0032660590775776656,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0032660590775776656,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001343308249488473,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6805555555555556,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0021895582554861902,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.679135348240379e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.679135348240379e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.22067134380340575,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.6989583333333333,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2975525438785553,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02206713445484638,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.02206713445484638,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22067134380340575,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.6989583333333333,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2975525438785553,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02206713445484638,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.02206713445484638,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22067134380340575,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.6989583333333333,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2975525438785553,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02206713445484638,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.02206713445484638,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22067134380340575,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.6989583333333333,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2975525438785553,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02206713445484638,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.02206713445484638,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22067134380340575,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.6989583333333333,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2975525438785553,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02206713445484638,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.02206713445484638,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22067134380340575,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.6989583333333333,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2975525438785553,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02206713445484638,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.02206713445484638,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22067134380340575,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.6989583333333333,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2975525438785553,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02206713445484638,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.02206713445484638,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.026366091147065163,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7888888888888889,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03810814470052719,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0026366091333329678,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0026366091333329678,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38300331830978396,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7260416666666667,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4546321153640747,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03830033168196678,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03830033168196678,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5399305014070592,
|
|
"calibration/batch_distribution_entropy": 0.49741838830536295,
|
|
"calibration/batch_entropy_100bins": 0.6470591660026435,
|
|
"calibration/batch_entropy_10bins": 0.49741838830536295,
|
|
"calibration/batch_entropy_50bins": 0.6181749096895081,
|
|
"calibration/batch_uniqueness": 0.5907532200896098,
|
|
"calibration/buffer_distribution_entropy": 0.8363355079750854,
|
|
"calibration/buffer_entropy_100bins": 0.8450362323859538,
|
|
"calibration/buffer_entropy_10bins": 0.8363355079750854,
|
|
"calibration/buffer_entropy_50bins": 0.8657301026926824,
|
|
"calibration/confidence_entropy": 0.20564557254488328,
|
|
"calibration/coverage@0%": 0.05212015712053617,
|
|
"calibration/coverage@1%": 0.10264099045386951,
|
|
"calibration/coverage@10%": 0.2891577985478643,
|
|
"calibration/coverage@15%": 0.3256461134468179,
|
|
"calibration/coverage@20%": 0.35065699247205717,
|
|
"calibration/coverage@25%": 0.376712279335677,
|
|
"calibration/coverage@30%": 0.3907883995326303,
|
|
"calibration/coverage@5%": 0.15422116886988343,
|
|
"calibration/ece": 0.05554478321511012,
|
|
"calibration/mean_confidence": 0.27574555197338846,
|
|
"calibration/prompt_uniqueness": 0.46331776524020124,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0015625000000000222,
|
|
"completions/max_length": 2791.0,
|
|
"completions/max_terminated_length": 2791.0,
|
|
"completions/mean_length": 710.73837890625,
|
|
"completions/mean_terminated_length": 711.8608520507812,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 190.8,
|
|
"epoch": 0.2159973000337496,
|
|
"grad_norm": 0.00019236108346376568,
|
|
"learning_rate": 3.5542168674698798e-06,
|
|
"loss": -0.0031,
|
|
"num_tokens": 187756326.0,
|
|
"reward": 1.1478140115737916,
|
|
"reward_std": 0.10408884063363075,
|
|
"rewards/accuracy_reward": 0.24027777976589276,
|
|
"rewards/brier_reward": 0.9400920748710633,
|
|
"rewards/confidence_uniqueness_reward": 0.58316290974617,
|
|
"rewards/format_reward": 0.9970486044883728,
|
|
"rewards/frontier_aurc_reward": -0.0016796960728242994,
|
|
"rewards/frontier_coverage_0": 0.6316808700561524,
|
|
"rewards/frontier_coverage_1": 0.6316808700561524,
|
|
"rewards/frontier_coverage_10": 0.6316808700561524,
|
|
"rewards/frontier_coverage_15": 0.6316808700561524,
|
|
"rewards/frontier_coverage_20": 0.6316808700561524,
|
|
"rewards/frontier_coverage_25": 0.6316808700561524,
|
|
"rewards/frontier_coverage_5": 0.6316808700561524,
|
|
"rewards/frontier_ece_reward": 0.014462851732969285,
|
|
"rewards/frontier_entropy_batch_reward": -0.6677661180496216,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12567274437751622,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1840277777777778,
|
|
"signal/accuracy_reward/group_std_mean": 0.16602826602756976,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5277777761220932,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06283637218875811,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06283637218875811,
|
|
"signal/advantage_abs_mean": 0.0688327431678772,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0688327431678772,
|
|
"signal/advantage_pre_scale_std": 0.12689779996871947,
|
|
"signal/advantage_std": 0.12689779996871947,
|
|
"signal/brier_reward/centered_abs_mean": 0.06333923451602459,
|
|
"signal/brier_reward/group_bin_occupancy": 0.5947916666666667,
|
|
"signal/brier_reward/group_std_mean": 0.10030964910984039,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.006333923456259072,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.006333923456259072,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2524725556373596,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.5340277777777779,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.28764230757951736,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025247253943234682,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.025247253943234682,
|
|
"signal/format_reward/centered_abs_mean": 0.005674913222901523,
|
|
"signal/format_reward/group_bin_occupancy": 0.13541666666666666,
|
|
"signal/format_reward/group_std_mean": 0.015499813482165337,
|
|
"signal/format_reward/group_zero_std_frac": 0.9166666746139527,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0028374566114507615,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0028374566114507615,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0008010723817278631,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6520833333333333,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0013616787298815324,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.0013404175879258e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.0013404175879258e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1786520630121231,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.690625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.236984321475029,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01786520555615425,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.01786520555615425,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1786520630121231,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.690625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.236984321475029,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01786520555615425,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.01786520555615425,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1786520630121231,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.690625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.236984321475029,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01786520555615425,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.01786520555615425,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1786520630121231,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.690625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.236984321475029,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01786520555615425,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.01786520555615425,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1786520630121231,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.690625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.236984321475029,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01786520555615425,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.01786520555615425,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1786520630121231,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.690625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.236984321475029,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01786520555615425,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.01786520555615425,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1786520630121231,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.690625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.236984321475029,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01786520555615425,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.01786520555615425,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.016731801349669694,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7232638888888889,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02399433497339487,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016731801675632595,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016731801675632595,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31356381475925443,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.523611111111111,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4009668231010437,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.08333333171904087,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031356383487582205,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031356383487582205,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.03994664725036534,
|
|
"calibration/batch_entropy_100bins": 0.4099580796026087,
|
|
"calibration/batch_entropy_10bins": 0.03994664725036534,
|
|
"calibration/batch_entropy_50bins": 0.32650286573394777,
|
|
"calibration/batch_uniqueness": 0.272314453125,
|
|
"calibration/buffer_distribution_entropy": 0.8455020693172232,
|
|
"calibration/buffer_entropy_100bins": 0.848574391143036,
|
|
"calibration/buffer_entropy_10bins": 0.8455020693172232,
|
|
"calibration/buffer_entropy_50bins": 0.8680614704922407,
|
|
"calibration/confidence_entropy": 0.0955832500869643,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.024320078501806765,
|
|
"calibration/mean_confidence": 0.024320078501806765,
|
|
"calibration/prompt_uniqueness": 0.18600260416666667,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0006944444444444642,
|
|
"completions/max_length": 2236.4,
|
|
"completions/max_terminated_length": 2236.4,
|
|
"completions/mean_length": 714.6057373046875,
|
|
"completions/mean_terminated_length": 715.1013305664062,
|
|
"completions/min_length": 33.4,
|
|
"completions/min_terminated_length": 164.4,
|
|
"epoch": 0.22799715003562457,
|
|
"grad_norm": 9.691954619484022e-05,
|
|
"learning_rate": 3.4036144578313257e-06,
|
|
"loss": -0.0009,
|
|
"num_tokens": 199080264.0,
|
|
"reward": 1.1923882484436035,
|
|
"reward_std": 0.034006135910749434,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9971752405166626,
|
|
"rewards/confidence_uniqueness_reward": 0.21403581481426953,
|
|
"rewards/format_reward": 0.9986979126930237,
|
|
"rewards/frontier_aurc_reward": -0.0019425456179305912,
|
|
"rewards/frontier_coverage_0": 0.9546570420265198,
|
|
"rewards/frontier_coverage_1": 0.9546570420265198,
|
|
"rewards/frontier_coverage_10": 0.9546570420265198,
|
|
"rewards/frontier_coverage_15": 0.9546570420265198,
|
|
"rewards/frontier_coverage_20": 0.9546570420265198,
|
|
"rewards/frontier_coverage_25": 0.9546570420265198,
|
|
"rewards/frontier_coverage_5": 0.9546570420265198,
|
|
"rewards/frontier_ece_reward": 0.005286368634551763,
|
|
"rewards/frontier_entropy_batch_reward": -0.9684616804122925,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.02305524256080389,
|
|
"signal/advantage_pre_scale_abs_mean": 0.02305524256080389,
|
|
"signal/advantage_pre_scale_std": 0.048858624696731565,
|
|
"signal/advantage_std": 0.048858624696731565,
|
|
"signal/brier_reward/centered_abs_mean": 0.004363900749012828,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6322916666666666,
|
|
"signal/brier_reward/group_std_mean": 0.01022816812619567,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00043639009818434714,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.00043639009818434714,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.36309358179569245,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.60625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.4173290342092514,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0363093588501215,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0363093588501215,
|
|
"signal/format_reward/centered_abs_mean": 0.002511935762595385,
|
|
"signal/format_reward/group_bin_occupancy": 0.12986111111111112,
|
|
"signal/format_reward/group_std_mean": 0.007066754624247551,
|
|
"signal/format_reward/group_zero_std_frac": 0.9611111044883728,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012559678812976926,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0012559678812976926,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 3.0336726194946095e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.8013888888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 4.735183538286947e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.792090922161151e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.792090922161151e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.04291480258107185,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8003472222222221,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.05929781645536423,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004291480267420411,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004291480267420411,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.04291480258107185,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8003472222222221,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.05929781645536423,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004291480267420411,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004291480267420411,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.04291480258107185,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8003472222222221,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.05929781645536423,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004291480267420411,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004291480267420411,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.04291480258107185,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8003472222222221,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.05929781645536423,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004291480267420411,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004291480267420411,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.04291480258107185,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8003472222222221,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.05929781645536423,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004291480267420411,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004291480267420411,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04291480258107185,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8003472222222221,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.05929781645536423,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004291480267420411,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004291480267420411,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.04291480258107185,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8003472222222221,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.05929781645536423,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004291480267420411,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004291480267420411,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004431074485182762,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6114583333333334,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006472232099622488,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004431074601598084,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004431074601598084,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.05818961188197136,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.221875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.12989502102136613,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.4555555522441864,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.005818961281329393,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.005818961281329393,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.03401586624223902,
|
|
"calibration/batch_entropy_100bins": 0.44183909396831,
|
|
"calibration/batch_entropy_10bins": 0.03401586624223902,
|
|
"calibration/batch_entropy_50bins": 0.3508888331700525,
|
|
"calibration/batch_uniqueness": 0.4576632450931252,
|
|
"calibration/buffer_distribution_entropy": 0.845870082499727,
|
|
"calibration/buffer_entropy_100bins": 0.8497800340010169,
|
|
"calibration/buffer_entropy_10bins": 0.845870082499727,
|
|
"calibration/buffer_entropy_50bins": 0.8667770466827367,
|
|
"calibration/confidence_entropy": 0.11965276406860055,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.029416174452166655,
|
|
"calibration/mean_confidence": 0.02941617445216666,
|
|
"calibration/prompt_uniqueness": 0.40579098606555386,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0016493055555555358,
|
|
"completions/max_length": 2943.8,
|
|
"completions/max_terminated_length": 2943.8,
|
|
"completions/mean_length": 714.0312377929688,
|
|
"completions/mean_terminated_length": 715.2037109375,
|
|
"completions/min_length": 29.0,
|
|
"completions/min_terminated_length": 176.4,
|
|
"epoch": 0.23999700003749952,
|
|
"grad_norm": 0.00013115812907926738,
|
|
"learning_rate": 3.2530120481927713e-06,
|
|
"loss": -0.0021,
|
|
"num_tokens": 210404976.0,
|
|
"reward": 1.2067232370376586,
|
|
"reward_std": 0.032975076138973235,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9963070631027222,
|
|
"rewards/confidence_uniqueness_reward": 0.45519496202468873,
|
|
"rewards/format_reward": 0.9979166746139526,
|
|
"rewards/frontier_aurc_reward": -0.002177492156624794,
|
|
"rewards/frontier_coverage_0": 0.9418468713760376,
|
|
"rewards/frontier_coverage_1": 0.9418468713760376,
|
|
"rewards/frontier_coverage_10": 0.9418468713760376,
|
|
"rewards/frontier_coverage_15": 0.9418468713760376,
|
|
"rewards/frontier_coverage_20": 0.9418468713760376,
|
|
"rewards/frontier_coverage_25": 0.9418468713760376,
|
|
"rewards/frontier_coverage_5": 0.9418468713760376,
|
|
"rewards/frontier_ece_reward": 0.0026272932533174752,
|
|
"rewards/frontier_entropy_batch_reward": -0.9691365361213684,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.020837346091866493,
|
|
"signal/advantage_pre_scale_abs_mean": 0.020837346091866493,
|
|
"signal/advantage_pre_scale_std": 0.05691418126225471,
|
|
"signal/advantage_std": 0.05691418126225471,
|
|
"signal/brier_reward/centered_abs_mean": 0.005596226127818227,
|
|
"signal/brier_reward/group_bin_occupancy": 0.69375,
|
|
"signal/brier_reward/group_std_mean": 0.011906285118311644,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0005596226139459759,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0005596226139459759,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2057848244905472,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9458333333333334,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.24700284898281097,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020578482002019883,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020578482002019883,
|
|
"signal/format_reward/centered_abs_mean": 0.003927951387595385,
|
|
"signal/format_reward/group_bin_occupancy": 0.1305555555555556,
|
|
"signal/format_reward/group_std_mean": 0.009288318641483783,
|
|
"signal/format_reward/group_zero_std_frac": 0.9555555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0019639756937976927,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0019639756937976927,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 3.2242565794149414e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.8621527777777777,
|
|
"signal/frontier_aurc_reward/group_std_mean": 5.1109886408085e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0303208947989333e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0303208947989333e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.04101281464099884,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8722222222222221,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.05649868324398995,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0041012815199792385,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0041012815199792385,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.04101281464099884,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8722222222222221,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.05649868324398995,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0041012815199792385,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0041012815199792385,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.04101281464099884,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8722222222222221,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.05649868324398995,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0041012815199792385,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0041012815199792385,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.04101281464099884,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8722222222222221,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.05649868324398995,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0041012815199792385,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0041012815199792385,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.04101281464099884,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8722222222222221,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.05649868324398995,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0041012815199792385,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0041012815199792385,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04101281464099884,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8722222222222221,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.05649868324398995,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0041012815199792385,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0041012815199792385,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.04101281464099884,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8722222222222221,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.05649868324398995,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0041012815199792385,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0041012815199792385,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0019168400205671788,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.5527777777777778,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.003455847967416048,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00019168400613125413,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00019168400613125413,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.05724479258060455,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.22361111111111112,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.13107622563838958,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.43888888955116273,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.005724479351192713,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.005724479351192713,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.23999700003749952,
|
|
"eval_calibration/aurc": 1.0,
|
|
"eval_calibration/batch_distribution_entropy": 0.020131112472651442,
|
|
"eval_calibration/batch_entropy_100bins": 0.3400135574714165,
|
|
"eval_calibration/batch_entropy_10bins": 0.020131112472651442,
|
|
"eval_calibration/batch_entropy_50bins": 0.250985950605582,
|
|
"eval_calibration/batch_uniqueness": 0.2688802083333333,
|
|
"eval_calibration/buffer_distribution_entropy": 0.8427708260785672,
|
|
"eval_calibration/buffer_entropy_100bins": 0.8483835963339752,
|
|
"eval_calibration/buffer_entropy_10bins": 0.8427708260785672,
|
|
"eval_calibration/buffer_entropy_50bins": 0.8634748665870949,
|
|
"eval_calibration/confidence_entropy": 0.08676234349291741,
|
|
"eval_calibration/coverage@0%": 0.0,
|
|
"eval_calibration/coverage@1%": 0.0,
|
|
"eval_calibration/coverage@10%": 0.0,
|
|
"eval_calibration/coverage@15%": 0.0,
|
|
"eval_calibration/coverage@20%": 0.0,
|
|
"eval_calibration/coverage@25%": 0.0,
|
|
"eval_calibration/coverage@30%": 0.0,
|
|
"eval_calibration/coverage@5%": 0.0,
|
|
"eval_calibration/ece": 0.02021172808892777,
|
|
"eval_calibration/mean_confidence": 0.02021172808892777,
|
|
"eval_calibration/prompt_uniqueness": 0.2688802083333333,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 1824.6666666666667,
|
|
"eval_completions/max_terminated_length": 1824.6666666666667,
|
|
"eval_completions/mean_length": 717.3307495117188,
|
|
"eval_completions/mean_terminated_length": 717.3307495117188,
|
|
"eval_completions/min_length": 241.0,
|
|
"eval_completions/min_terminated_length": 241.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 210404976.0,
|
|
"eval_reward": 1.1970368425051372,
|
|
"eval_reward_std": 0.022655950548748176,
|
|
"eval_rewards/accuracy_reward": 0.0,
|
|
"eval_rewards/brier_reward": 0.9990857243537903,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.2510850702722867,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0022718874970450997,
|
|
"eval_rewards/frontier_coverage_0": 0.9599077602227529,
|
|
"eval_rewards/frontier_coverage_1": 0.9599077602227529,
|
|
"eval_rewards/frontier_coverage_10": 0.9599077602227529,
|
|
"eval_rewards/frontier_coverage_15": 0.9599077602227529,
|
|
"eval_rewards/frontier_coverage_20": 0.9599077602227529,
|
|
"eval_rewards/frontier_coverage_25": 0.9599077602227529,
|
|
"eval_rewards/frontier_coverage_5": 0.9599077602227529,
|
|
"eval_rewards/frontier_ece_reward": 0.0011265517581099023,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 101.635,
|
|
"eval_samples_per_second": 9.839,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/advantage_abs_mean": 0.01836820226162672,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.01836820226162672,
|
|
"eval_signal/advantage_pre_scale_std": 0.022806229380269844,
|
|
"eval_signal/advantage_std": 0.022806229380269844,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.0011060868758553017,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.6493055555555555,
|
|
"eval_signal/brier_reward/group_std_mean": 0.0018368690895537536,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00011060868928325363,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.00011060868928325363,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.2825656458735466,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8611111111111112,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.34159958362579346,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028256564401090145,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.028256564401090145,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 1.9667225690985408e-05,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.8472222222222223,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 2.66848749864342e-05,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.45840328242745e-07,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.45840328242745e-07,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.03097957745194435,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.84375,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.041869492580493294,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0030979577374334135,
|
|
"eval_signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030979577374334135,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.03097957745194435,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.84375,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.041869492580493294,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030979577374334135,
|
|
"eval_signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030979577374334135,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.03097957745194435,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.84375,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.041869492580493294,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030979577374334135,
|
|
"eval_signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030979577374334135,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.03097957745194435,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.84375,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.041869492580493294,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030979577374334135,
|
|
"eval_signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030979577374334135,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.03097957745194435,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.84375,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.041869492580493294,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030979577374334135,
|
|
"eval_signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030979577374334135,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.03097957745194435,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.84375,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.041869492580493294,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030979577374334135,
|
|
"eval_signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030979577374334135,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.03097957745194435,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.84375,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.041869492580493294,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030979577374334135,
|
|
"eval_signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030979577374334135,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0009274356222401062,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.4930555555555555,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.0017319663796418656,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 9.274356186021275e-05,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 9.274356186021275e-05,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.059,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.012341952991260587,
|
|
"calibration/batch_entropy_100bins": 0.37614392423453313,
|
|
"calibration/batch_entropy_10bins": 0.012341952991260587,
|
|
"calibration/batch_entropy_50bins": 0.27599843753881276,
|
|
"calibration/batch_uniqueness": 0.33745739403507496,
|
|
"calibration/buffer_distribution_entropy": 0.8390653652277222,
|
|
"calibration/buffer_entropy_100bins": 0.8455699458385212,
|
|
"calibration/buffer_entropy_10bins": 0.8390653652277222,
|
|
"calibration/buffer_entropy_50bins": 0.8593265890928249,
|
|
"calibration/confidence_entropy": 0.08470434211228765,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.01988270931589781,
|
|
"calibration/mean_confidence": 0.019882709315897806,
|
|
"calibration/prompt_uniqueness": 0.26092733805931323,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0007812500000000222,
|
|
"completions/max_length": 2558.6,
|
|
"completions/max_terminated_length": 2558.6,
|
|
"completions/mean_length": 702.9444580078125,
|
|
"completions/mean_terminated_length": 703.4955688476563,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 164.8,
|
|
"epoch": 0.2519968500393745,
|
|
"grad_norm": 9.564626816427335e-05,
|
|
"learning_rate": 3.1024096385542172e-06,
|
|
"loss": -0.001,
|
|
"num_tokens": 221579760.0,
|
|
"reward": 1.2081443309783935,
|
|
"reward_std": 0.023845688626170158,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9981184363365173,
|
|
"rewards/confidence_uniqueness_reward": 0.3303511440753937,
|
|
"rewards/format_reward": 0.9990451335906982,
|
|
"rewards/frontier_aurc_reward": -0.0024016556330025197,
|
|
"rewards/frontier_coverage_0": 0.9605079174041748,
|
|
"rewards/frontier_coverage_1": 0.9605079174041748,
|
|
"rewards/frontier_coverage_10": 0.9605079174041748,
|
|
"rewards/frontier_coverage_15": 0.9605079174041748,
|
|
"rewards/frontier_coverage_20": 0.9605079174041748,
|
|
"rewards/frontier_coverage_25": 0.9605079174041748,
|
|
"rewards/frontier_coverage_5": 0.9605079174041748,
|
|
"rewards/frontier_ece_reward": 0.0008044078014791012,
|
|
"rewards/frontier_entropy_batch_reward": -0.9663121461868286,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.014495623297989368,
|
|
"signal/advantage_pre_scale_abs_mean": 0.014495623297989368,
|
|
"signal/advantage_pre_scale_std": 0.04096686318516731,
|
|
"signal/advantage_std": 0.04096686318516731,
|
|
"signal/brier_reward/centered_abs_mean": 0.00292236702516675,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6513888888888889,
|
|
"signal/brier_reward/group_std_mean": 0.006811278499662876,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0002922367071732879,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0002922367071732879,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.25255054533481597,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8784722222222221,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.3084090232849121,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025255054607987402,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.025255054607987402,
|
|
"signal/format_reward/centered_abs_mean": 0.0018391926656477152,
|
|
"signal/format_reward/group_bin_occupancy": 0.12847222222222224,
|
|
"signal/format_reward/group_std_mean": 0.005102569004520774,
|
|
"signal/format_reward/group_zero_std_frac": 0.9722222089767456,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0009195963328238576,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0009195963328238576,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 2.5563345479895362e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.8350694444444444,
|
|
"signal/frontier_aurc_reward/group_std_mean": 4.01370954932645e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1954182304616553e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1954182304616553e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.03394378535449505,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8385416666666667,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.046593782305717465,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003394378535449505,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003394378535449505,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.03394378535449505,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8385416666666667,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.046593782305717465,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003394378535449505,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003394378535449505,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.03394378535449505,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8385416666666667,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.046593782305717465,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003394378535449505,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003394378535449505,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.03394378535449505,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8385416666666667,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.046593782305717465,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003394378535449505,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003394378535449505,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.03394378535449505,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8385416666666667,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.046593782305717465,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003394378535449505,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003394378535449505,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.03394378535449505,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8385416666666667,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.046593782305717465,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003394378535449505,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003394378535449505,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.03394378535449505,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8385416666666667,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.046593782305717465,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003394378535449505,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003394378535449505,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0007472379831597209,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.49756944444444445,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.001472054049372673,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 7.472379875252954e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 7.472379875252954e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0619256779551506,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2510416666666667,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.1331087276339531,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3027777820825577,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0061925679445266725,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0061925679445266725,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.021284326771816582,
|
|
"calibration/batch_entropy_100bins": 0.3951721251871505,
|
|
"calibration/batch_entropy_10bins": 0.021284326771816582,
|
|
"calibration/batch_entropy_50bins": 0.30880453250559664,
|
|
"calibration/batch_uniqueness": 0.4211642795138889,
|
|
"calibration/buffer_distribution_entropy": 0.829256019029493,
|
|
"calibration/buffer_entropy_100bins": 0.8388420472420786,
|
|
"calibration/buffer_entropy_10bins": 0.829256019029493,
|
|
"calibration/buffer_entropy_50bins": 0.8499276619128068,
|
|
"calibration/confidence_entropy": 0.08871296935789795,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.022094368099568412,
|
|
"calibration/mean_confidence": 0.022094368099568412,
|
|
"calibration/prompt_uniqueness": 0.3543619791666667,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0004340277777777901,
|
|
"completions/max_length": 2544.4,
|
|
"completions/max_terminated_length": 2544.4,
|
|
"completions/mean_length": 696.4470581054687,
|
|
"completions/mean_terminated_length": 696.752587890625,
|
|
"completions/min_length": 112.6,
|
|
"completions/min_terminated_length": 184.8,
|
|
"epoch": 0.2639967000412495,
|
|
"grad_norm": 0.00010304038733011112,
|
|
"learning_rate": 2.9518072289156627e-06,
|
|
"loss": -0.0007,
|
|
"num_tokens": 232711278.0,
|
|
"reward": 1.2140116214752197,
|
|
"reward_std": 0.020644556544721127,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9981519222259522,
|
|
"rewards/confidence_uniqueness_reward": 0.42851542234420775,
|
|
"rewards/format_reward": 0.9993923664093017,
|
|
"rewards/frontier_aurc_reward": -0.002601869171485305,
|
|
"rewards/frontier_coverage_0": 0.9562180042266846,
|
|
"rewards/frontier_coverage_1": 0.9562180042266846,
|
|
"rewards/frontier_coverage_10": 0.9562180042266846,
|
|
"rewards/frontier_coverage_15": 0.9562180042266846,
|
|
"rewards/frontier_coverage_20": 0.9562180042266846,
|
|
"rewards/frontier_coverage_25": 0.9562180042266846,
|
|
"rewards/frontier_coverage_5": 0.9562180042266846,
|
|
"rewards/frontier_ece_reward": 0.0006137272284831852,
|
|
"rewards/frontier_entropy_batch_reward": -0.9773278951644897,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.012931106984615326,
|
|
"signal/advantage_pre_scale_abs_mean": 0.012931106984615326,
|
|
"signal/advantage_pre_scale_std": 0.03138108551502228,
|
|
"signal/advantage_std": 0.03138108551502228,
|
|
"signal/brier_reward/centered_abs_mean": 0.0026462335139513017,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6840277777777778,
|
|
"signal/brier_reward/group_std_mean": 0.005694417608901858,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00026462336245458573,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.00026462336245458573,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2552150249481201,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7388888888888888,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2904451459646225,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025521503388881685,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.025521503388881685,
|
|
"signal/format_reward/centered_abs_mean": 0.0011773003148846327,
|
|
"signal/format_reward/group_bin_occupancy": 0.12743055555555557,
|
|
"signal/format_reward/group_std_mean": 0.0034373244270682335,
|
|
"signal/format_reward/group_zero_std_frac": 0.9805555462837219,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005886501574423164,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0005886501574423164,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 2.938565012300387e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.8614583333333332,
|
|
"signal/frontier_aurc_reward/group_std_mean": 4.244408264639787e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6732062653754837e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6732062653754837e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.04040106013417244,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8690972222222223,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.05285699814558029,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004040106106549502,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004040106106549502,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.04040106013417244,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8690972222222223,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.05285699814558029,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004040106106549502,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004040106106549502,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.04040106013417244,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8690972222222223,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.05285699814558029,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004040106106549502,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004040106106549502,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.04040106013417244,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8690972222222223,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.05285699814558029,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004040106106549502,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004040106106549502,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.04040106013417244,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8690972222222223,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.05285699814558029,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004040106106549502,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004040106106549502,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04040106013417244,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8690972222222223,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.05285699814558029,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004040106106549502,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004040106106549502,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.04040106013417244,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8690972222222223,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.05285699814558029,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004040106106549502,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004040106106549502,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0007259678619448096,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.471875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.001656959392130375,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 7.259679259732365e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 7.259679259732365e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.042409443855285646,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.21388888888888888,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.10218364298343659,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.46111112236976626,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0042409445624798535,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0042409445624798535,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.01673821934029086,
|
|
"calibration/batch_entropy_100bins": 0.38926544017604686,
|
|
"calibration/batch_entropy_10bins": 0.01673821934029086,
|
|
"calibration/batch_entropy_50bins": 0.2993580485835123,
|
|
"calibration/batch_uniqueness": 0.4159125434027778,
|
|
"calibration/buffer_distribution_entropy": 0.8175787124013256,
|
|
"calibration/buffer_entropy_100bins": 0.8315024676718213,
|
|
"calibration/buffer_entropy_10bins": 0.8175787124013256,
|
|
"calibration/buffer_entropy_50bins": 0.8402204734165609,
|
|
"calibration/confidence_entropy": 0.08572288929480461,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.021108248916521234,
|
|
"calibration/mean_confidence": 0.02110824891652123,
|
|
"calibration/prompt_uniqueness": 0.3557942708333333,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00017361111111111605,
|
|
"completions/max_length": 2377.6,
|
|
"completions/max_terminated_length": 2377.6,
|
|
"completions/mean_length": 671.6215209960938,
|
|
"completions/mean_terminated_length": 671.7394165039062,
|
|
"completions/min_length": 89.6,
|
|
"completions/min_terminated_length": 164.0,
|
|
"epoch": 0.27599655004312446,
|
|
"grad_norm": 5.828886787639931e-05,
|
|
"learning_rate": 2.8012048192771087e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 243527558.0,
|
|
"reward": 1.2144116401672362,
|
|
"reward_std": 0.01996447965502739,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9981747031211853,
|
|
"rewards/confidence_uniqueness_reward": 0.4300437688827515,
|
|
"rewards/format_reward": 0.9993923544883728,
|
|
"rewards/frontier_aurc_reward": -0.002783898450434208,
|
|
"rewards/frontier_coverage_0": 0.9569729804992676,
|
|
"rewards/frontier_coverage_1": 0.9569729804992676,
|
|
"rewards/frontier_coverage_10": 0.9569729804992676,
|
|
"rewards/frontier_coverage_15": 0.9569729804992676,
|
|
"rewards/frontier_coverage_20": 0.9569729804992676,
|
|
"rewards/frontier_coverage_25": 0.9569729804992676,
|
|
"rewards/frontier_coverage_5": 0.9569729804992676,
|
|
"rewards/frontier_ece_reward": 0.0003723478992469609,
|
|
"rewards/frontier_entropy_batch_reward": -0.9798990249633789,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.012076887860894203,
|
|
"signal/advantage_pre_scale_abs_mean": 0.012076887860894203,
|
|
"signal/advantage_pre_scale_std": 0.03225056380033493,
|
|
"signal/advantage_std": 0.03225056380033493,
|
|
"signal/brier_reward/centered_abs_mean": 0.00263253313023597,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6972222222222222,
|
|
"signal/brier_reward/group_std_mean": 0.005726341786794364,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0002632533200085163,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0002632533200085163,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2511452704668045,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7500000000000001,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2867334961891174,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025114526599645616,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.025114526599645616,
|
|
"signal/format_reward/centered_abs_mean": 0.001177300326526165,
|
|
"signal/format_reward/group_bin_occupancy": 0.12743055555555555,
|
|
"signal/format_reward/group_std_mean": 0.0034373244270682335,
|
|
"signal/format_reward/group_zero_std_frac": 0.9805555462837219,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005886501632630825,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0005886501632630825,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 2.965504681924358e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.8607638888888888,
|
|
"signal/frontier_aurc_reward/group_std_mean": 4.3591349822236225e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.706880761455977e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.706880761455977e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.03983094990253448,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8743055555555556,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.052179882675409316,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003983095102012157,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003983095102012157,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.03983094990253448,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8743055555555556,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.052179882675409316,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003983095102012157,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003983095102012157,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.03983094990253448,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8743055555555556,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.052179882675409316,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003983095102012157,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003983095102012157,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.03983094990253448,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8743055555555556,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.052179882675409316,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003983095102012157,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003983095102012157,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.03983094990253448,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8743055555555556,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.052179882675409316,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003983095102012157,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003983095102012157,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.03983094990253448,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8743055555555556,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.052179882675409316,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003983095102012157,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003983095102012157,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.03983094990253448,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8743055555555556,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.052179882675409316,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003983095102012157,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003983095102012157,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0004976392199750989,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.478125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0012285706703551113,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 4.9763925198931246e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 4.9763925198931246e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.03792983740568161,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.19652777777777777,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.09351965636014939,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.5333333432674408,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.003792983898892999,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.003792983898892999,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.015287037913899185,
|
|
"calibration/batch_entropy_100bins": 0.3848550389267389,
|
|
"calibration/batch_entropy_10bins": 0.015287037913899185,
|
|
"calibration/batch_entropy_50bins": 0.2882901972717423,
|
|
"calibration/batch_uniqueness": 0.425244140625,
|
|
"calibration/buffer_distribution_entropy": 0.8046537012281669,
|
|
"calibration/buffer_entropy_100bins": 0.8234345031545167,
|
|
"calibration/buffer_entropy_10bins": 0.8046537012281669,
|
|
"calibration/buffer_entropy_50bins": 0.8296096064776775,
|
|
"calibration/confidence_entropy": 0.08368989254666424,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.020141531457395022,
|
|
"calibration/mean_confidence": 0.020141531457395025,
|
|
"calibration/prompt_uniqueness": 0.3623046875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 2541.8,
|
|
"completions/max_terminated_length": 2541.8,
|
|
"completions/mean_length": 682.83447265625,
|
|
"completions/mean_terminated_length": 683.363427734375,
|
|
"completions/min_length": 59.2,
|
|
"completions/min_terminated_length": 159.4,
|
|
"epoch": 0.28799640004499943,
|
|
"grad_norm": 5.349236380425282e-05,
|
|
"learning_rate": 2.6506024096385547e-06,
|
|
"loss": -0.001,
|
|
"num_tokens": 254475667.0,
|
|
"reward": 1.217322826385498,
|
|
"reward_std": 0.018410124257206915,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9980918765068054,
|
|
"rewards/confidence_uniqueness_reward": 0.43044655919075014,
|
|
"rewards/format_reward": 0.9991319298744201,
|
|
"rewards/frontier_aurc_reward": -0.002950493525713682,
|
|
"rewards/frontier_coverage_0": 0.959316098690033,
|
|
"rewards/frontier_coverage_1": 0.959316098690033,
|
|
"rewards/frontier_coverage_10": 0.959316098690033,
|
|
"rewards/frontier_coverage_15": 0.959316098690033,
|
|
"rewards/frontier_coverage_20": 0.959316098690033,
|
|
"rewards/frontier_coverage_25": 0.959316098690033,
|
|
"rewards/frontier_coverage_5": 0.959316098690033,
|
|
"rewards/frontier_ece_reward": 0.0002875441190553829,
|
|
"rewards/frontier_entropy_batch_reward": -0.9661013126373291,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.01139154490083456,
|
|
"signal/advantage_pre_scale_abs_mean": 0.01139154490083456,
|
|
"signal/advantage_pre_scale_std": 0.03482802901417017,
|
|
"signal/advantage_std": 0.03482802901417017,
|
|
"signal/brier_reward/centered_abs_mean": 0.00280342239420861,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6600694444444445,
|
|
"signal/brier_reward/group_std_mean": 0.005472193798050284,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00028034225688315927,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.00028034225688315927,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.21315207481384277,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2526503801345825,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021315207704901695,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.021315207704901695,
|
|
"signal/format_reward/centered_abs_mean": 0.0015733506879769266,
|
|
"signal/format_reward/group_bin_occupancy": 0.12708333333333333,
|
|
"signal/format_reward/group_std_mean": 0.0034799596294760706,
|
|
"signal/format_reward/group_zero_std_frac": 0.9833333253860473,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007866753439884633,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007866753439884633,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 2.8699574977508745e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.829861111111111,
|
|
"signal/frontier_aurc_reward/group_std_mean": 4.1820811020443216e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.587446883557277e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.587446883557277e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.0366835243999958,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.85,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.04874376505613327,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0036683525424450634,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036683525424450634,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0366835243999958,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.85,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.04874376505613327,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036683525424450634,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036683525424450634,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0366835243999958,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.85,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.04874376505613327,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036683525424450634,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036683525424450634,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0366835243999958,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.85,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.04874376505613327,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036683525424450634,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036683525424450634,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0366835243999958,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.85,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.04874376505613327,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036683525424450634,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036683525424450634,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0366835243999958,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.85,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.04874376505613327,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036683525424450634,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036683525424450634,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0366835243999958,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.85,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.04874376505613327,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036683525424450634,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036683525424450634,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00045878663659095766,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.4222222222222222,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0012177627184428274,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 4.587866424117237e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 4.587866424117237e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06285881474614144,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25069444444444444,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.14192153960466386,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3166666716337204,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.00628588180989027,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.00628588180989027,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.022472576476576352,
|
|
"calibration/batch_entropy_100bins": 0.38102577658556147,
|
|
"calibration/batch_entropy_10bins": 0.022472576476576352,
|
|
"calibration/batch_entropy_50bins": 0.28351199674802774,
|
|
"calibration/batch_uniqueness": 0.4171188269475567,
|
|
"calibration/buffer_distribution_entropy": 0.791119751195128,
|
|
"calibration/buffer_entropy_100bins": 0.8150867335752293,
|
|
"calibration/buffer_entropy_10bins": 0.791119751195128,
|
|
"calibration/buffer_entropy_50bins": 0.8186152341744503,
|
|
"calibration/confidence_entropy": 0.08195105191568414,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.019687573368622573,
|
|
"calibration/mean_confidence": 0.01968757336862257,
|
|
"calibration/prompt_uniqueness": 0.34676777180406215,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0007812500000000222,
|
|
"completions/max_length": 2195.6,
|
|
"completions/max_terminated_length": 2195.6,
|
|
"completions/mean_length": 685.1104248046875,
|
|
"completions/mean_terminated_length": 685.647119140625,
|
|
"completions/min_length": 42.6,
|
|
"completions/min_terminated_length": 190.8,
|
|
"epoch": 0.2999962500468744,
|
|
"grad_norm": 7.856736920075491e-05,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": -0.001,
|
|
"num_tokens": 265485803.0,
|
|
"reward": 1.2181265592575072,
|
|
"reward_std": 0.01852573864161968,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9982476234436035,
|
|
"rewards/confidence_uniqueness_reward": 0.42563745379447937,
|
|
"rewards/format_reward": 0.999218738079071,
|
|
"rewards/frontier_aurc_reward": -0.003106745798140764,
|
|
"rewards/frontier_coverage_0": 0.960704791545868,
|
|
"rewards/frontier_coverage_1": 0.960704791545868,
|
|
"rewards/frontier_coverage_10": 0.960704791545868,
|
|
"rewards/frontier_coverage_15": 0.960704791545868,
|
|
"rewards/frontier_coverage_20": 0.960704791545868,
|
|
"rewards/frontier_coverage_25": 0.960704791545868,
|
|
"rewards/frontier_coverage_5": 0.960704791545868,
|
|
"rewards/frontier_ece_reward": 0.00022829854860901834,
|
|
"rewards/frontier_entropy_batch_reward": -0.9634865045547485,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.011229231022298336,
|
|
"signal/advantage_pre_scale_abs_mean": 0.011229231022298336,
|
|
"signal/advantage_pre_scale_std": 0.03441160153597593,
|
|
"signal/advantage_std": 0.03441160153597593,
|
|
"signal/brier_reward/centered_abs_mean": 0.0026414696127176284,
|
|
"signal/brier_reward/group_bin_occupancy": 0.640625,
|
|
"signal/brier_reward/group_std_mean": 0.0057011493248865005,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00026414696621941404,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.00026414696621941404,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.19697971045970916,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.23735004365444184,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019697971269488336,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019697971269488336,
|
|
"signal/format_reward/centered_abs_mean": 0.0014919704641215502,
|
|
"signal/format_reward/group_bin_occupancy": 0.12743055555555555,
|
|
"signal/format_reward/group_std_mean": 0.003821535501629114,
|
|
"signal/format_reward/group_zero_std_frac": 0.9805555462837219,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007459852320607751,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007459852320607751,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 2.7659153784043155e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.8177083333333333,
|
|
"signal/frontier_aurc_reward/group_std_mean": 4.221230701659806e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4573943707982835e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4573943707982835e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.03492500931024552,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.835763888888889,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.04719159454107284,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0034925009589642287,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034925009589642287,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.03492500931024552,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.835763888888889,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.04719159454107284,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034925009589642287,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034925009589642287,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.03492500931024552,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.835763888888889,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.04719159454107284,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034925009589642287,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034925009589642287,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.03492500931024552,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.835763888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.04719159454107284,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034925009589642287,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034925009589642287,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.03492500931024552,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.835763888888889,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.04719159454107284,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034925009589642287,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034925009589642287,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.03492500931024552,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.835763888888889,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.04719159454107284,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034925009589642287,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034925009589642287,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.03492500931024552,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.835763888888889,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.04719159454107284,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034925009589642287,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034925009589642287,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00042187916114926337,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.45069444444444445,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0011129227350465953,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 4.2187915096292275e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 4.2187915096292275e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06738204509019852,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.253125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.14837366938591004,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3000000059604645,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.006738204788416624,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.006738204788416624,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.029921545629725488,
|
|
"calibration/batch_entropy_100bins": 0.3938974837071955,
|
|
"calibration/batch_entropy_10bins": 0.029921545629725488,
|
|
"calibration/batch_entropy_50bins": 0.2948700326136235,
|
|
"calibration/batch_uniqueness": 0.4545985713361545,
|
|
"calibration/buffer_distribution_entropy": 0.7775809153332081,
|
|
"calibration/buffer_entropy_100bins": 0.8068847106686515,
|
|
"calibration/buffer_entropy_10bins": 0.7775809153332081,
|
|
"calibration/buffer_entropy_50bins": 0.8077963576382909,
|
|
"calibration/confidence_entropy": 0.08758419464326617,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.021199454071341377,
|
|
"calibration/mean_confidence": 0.02119945407134138,
|
|
"calibration/prompt_uniqueness": 0.4083991149627124,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0004340277777777901,
|
|
"completions/max_length": 2295.8,
|
|
"completions/max_terminated_length": 2295.8,
|
|
"completions/mean_length": 692.5526977539063,
|
|
"completions/mean_terminated_length": 692.8485107421875,
|
|
"completions/min_length": 28.6,
|
|
"completions/min_terminated_length": 171.6,
|
|
"epoch": 0.3119961000487494,
|
|
"grad_norm": 4.43594872194808e-05,
|
|
"learning_rate": 2.349397590361446e-06,
|
|
"loss": -0.0005,
|
|
"num_tokens": 276588810.0,
|
|
"reward": 1.219011116027832,
|
|
"reward_std": 0.016876889020204545,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9985480904579163,
|
|
"rewards/confidence_uniqueness_reward": 0.42221215963363645,
|
|
"rewards/format_reward": 0.9995659589767456,
|
|
"rewards/frontier_aurc_reward": -0.003253296110779047,
|
|
"rewards/frontier_coverage_0": 0.9612909436225892,
|
|
"rewards/frontier_coverage_1": 0.9612909436225892,
|
|
"rewards/frontier_coverage_10": 0.9612909436225892,
|
|
"rewards/frontier_coverage_15": 0.9612909436225892,
|
|
"rewards/frontier_coverage_20": 0.9612909436225892,
|
|
"rewards/frontier_coverage_25": 0.9612909436225892,
|
|
"rewards/frontier_coverage_5": 0.9612909436225892,
|
|
"rewards/frontier_ece_reward": 0.0002832911792211235,
|
|
"rewards/frontier_entropy_batch_reward": -0.9573925971984864,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.010039843246340751,
|
|
"signal/advantage_pre_scale_abs_mean": 0.010039843246340751,
|
|
"signal/advantage_pre_scale_std": 0.028154394030570982,
|
|
"signal/advantage_std": 0.028154394030570982,
|
|
"signal/brier_reward/centered_abs_mean": 0.002099990099668503,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6159722222222221,
|
|
"signal/brier_reward/group_std_mean": 0.00470021041110158,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0002099990117130801,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0002099990117130801,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1907490074634552,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8645833333333334,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.23286633491516112,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019074901193380355,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019074901193380355,
|
|
"signal/format_reward/centered_abs_mean": 0.0008409288129769266,
|
|
"signal/format_reward/group_bin_occupancy": 0.1267361111111111,
|
|
"signal/format_reward/group_std_mean": 0.0024552317336201668,
|
|
"signal/format_reward/group_zero_std_frac": 0.9861111044883728,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004204644064884633,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004204644064884633,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 2.587656126706861e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.8006944444444445,
|
|
"signal/frontier_aurc_reward/group_std_mean": 3.971766345784999e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.2345701015401576e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.2345701015401576e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.034396450221538546,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.04737475067377091,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0034396452363580467,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034396452363580467,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.034396450221538546,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.04737475067377091,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034396452363580467,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034396452363580467,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.034396450221538546,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.04737475067377091,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034396452363580467,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034396452363580467,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.034396450221538546,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.04737475067377091,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034396452363580467,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034396452363580467,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.034396450221538546,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.04737475067377091,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034396452363580467,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034396452363580467,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.034396450221538546,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.04737475067377091,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034396452363580467,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034396452363580467,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.034396450221538546,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.04737475067377091,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034396452363580467,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034396452363580467,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0005845244158990681,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.45833333333333337,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.001524832844734192,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 5.845244086231105e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 5.845244086231105e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07861145734786987,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2697916666666667,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.1693776398897171,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.24999999701976777,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.007861145678907633,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.007861145678907633,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.033724537578294043,
|
|
"calibration/batch_entropy_100bins": 0.3790440291598166,
|
|
"calibration/batch_entropy_10bins": 0.033724537578294043,
|
|
"calibration/batch_entropy_50bins": 0.27996729733610637,
|
|
"calibration/batch_uniqueness": 0.42015942923298233,
|
|
"calibration/buffer_distribution_entropy": 0.7640241372764862,
|
|
"calibration/buffer_entropy_100bins": 0.7985068573608476,
|
|
"calibration/buffer_entropy_10bins": 0.7640241372764862,
|
|
"calibration/buffer_entropy_50bins": 0.796776148875944,
|
|
"calibration/confidence_entropy": 0.08183363627321727,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.019873024668607783,
|
|
"calibration/mean_confidence": 0.019873024668607787,
|
|
"calibration/prompt_uniqueness": 0.3658195672910164,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0013020833333333482,
|
|
"completions/max_length": 3082.2,
|
|
"completions/max_terminated_length": 3082.2,
|
|
"completions/mean_length": 669.673876953125,
|
|
"completions/mean_terminated_length": 670.5369018554687,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 172.4,
|
|
"epoch": 0.32399595005062437,
|
|
"grad_norm": 5.57123712496832e-05,
|
|
"learning_rate": 2.1987951807228917e-06,
|
|
"loss": -0.0017,
|
|
"num_tokens": 287396477.0,
|
|
"reward": 1.2182500600814818,
|
|
"reward_std": 0.019175108522176743,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9975671648979187,
|
|
"rewards/confidence_uniqueness_reward": 0.43408032059669494,
|
|
"rewards/format_reward": 0.9986979246139527,
|
|
"rewards/frontier_aurc_reward": -0.003387561673298478,
|
|
"rewards/frontier_coverage_0": 0.9586145401000976,
|
|
"rewards/frontier_coverage_1": 0.9586145401000976,
|
|
"rewards/frontier_coverage_10": 0.9586145401000976,
|
|
"rewards/frontier_coverage_15": 0.9586145401000976,
|
|
"rewards/frontier_coverage_20": 0.9586145401000976,
|
|
"rewards/frontier_coverage_25": 0.9586145401000976,
|
|
"rewards/frontier_coverage_5": 0.9586145401000976,
|
|
"rewards/frontier_ece_reward": 0.00035531093017198144,
|
|
"rewards/frontier_entropy_batch_reward": -0.9528710126876831,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.011726399697363377,
|
|
"signal/advantage_pre_scale_abs_mean": 0.011726399697363377,
|
|
"signal/advantage_pre_scale_std": 0.04185779392719269,
|
|
"signal/advantage_std": 0.04185779392719269,
|
|
"signal/brier_reward/centered_abs_mean": 0.003680155472829938,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6277777777777778,
|
|
"signal/brier_reward/group_std_mean": 0.006919549405574798,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0003680155467009172,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0003680155467009172,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.20299766063690186,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8690972222222222,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.24341756701469422,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020299766212701797,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020299766212701797,
|
|
"signal/format_reward/centered_abs_mean": 0.002273220452480018,
|
|
"signal/format_reward/group_bin_occupancy": 0.12743055555555555,
|
|
"signal/format_reward/group_std_mean": 0.004497193172574044,
|
|
"signal/format_reward/group_zero_std_frac": 0.9805555462837219,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001136610226240009,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.001136610226240009,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 3.237565179006197e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.8003472222222221,
|
|
"signal/frontier_aurc_reward/group_std_mean": 4.882146386080422e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0469563487022244e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0469563487022244e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.03778303116559982,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8256944444444443,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.05185385718941689,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003778303088620305,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003778303088620305,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.03778303116559982,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8256944444444443,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.05185385718941689,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003778303088620305,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003778303088620305,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.03778303116559982,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8256944444444443,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.05185385718941689,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003778303088620305,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003778303088620305,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.03778303116559982,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8256944444444443,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.05185385718941689,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003778303088620305,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003778303088620305,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.03778303116559982,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8256944444444443,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.05185385718941689,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003778303088620305,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003778303088620305,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.03778303116559982,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8256944444444443,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.05185385718941689,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003778303088620305,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003778303088620305,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.03778303116559982,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8256944444444443,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.05185385718941689,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003778303088620305,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003778303088620305,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0007637530216015875,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.44340277777777776,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.001957472856156528,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 7.637530070496723e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 7.637530070496723e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08594719767570495,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2774305555555555,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.17783950865268708,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.24166666865348815,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.008594720251858234,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.008594720251858234,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.030543712678111984,
|
|
"calibration/batch_entropy_100bins": 0.3845846721133629,
|
|
"calibration/batch_entropy_10bins": 0.030543712678111984,
|
|
"calibration/batch_entropy_50bins": 0.2859978978931486,
|
|
"calibration/batch_uniqueness": 0.4320583767361111,
|
|
"calibration/buffer_distribution_entropy": 0.7508008699338516,
|
|
"calibration/buffer_entropy_100bins": 0.7925527781072645,
|
|
"calibration/buffer_entropy_10bins": 0.7508008699338516,
|
|
"calibration/buffer_entropy_50bins": 0.7866199361280733,
|
|
"calibration/confidence_entropy": 0.08360223614543279,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.020143632359686154,
|
|
"calibration/mean_confidence": 0.020143632359686154,
|
|
"calibration/prompt_uniqueness": 0.3770182291666667,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0004340277777777901,
|
|
"completions/max_length": 3079.4,
|
|
"completions/max_terminated_length": 3079.4,
|
|
"completions/mean_length": 677.6828979492187,
|
|
"completions/mean_terminated_length": 677.9762817382813,
|
|
"completions/min_length": 33.4,
|
|
"completions/min_terminated_length": 176.2,
|
|
"epoch": 0.33599580005249935,
|
|
"grad_norm": 5.097528628539294e-05,
|
|
"learning_rate": 2.0481927710843377e-06,
|
|
"loss": -0.0005,
|
|
"num_tokens": 298307608.0,
|
|
"reward": 1.219898509979248,
|
|
"reward_std": 0.016702464036643504,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9983464241027832,
|
|
"rewards/confidence_uniqueness_reward": 0.4385930418968201,
|
|
"rewards/format_reward": 0.9995659589767456,
|
|
"rewards/frontier_aurc_reward": -0.0035497402306646107,
|
|
"rewards/frontier_coverage_0": 0.958567988872528,
|
|
"rewards/frontier_coverage_1": 0.958567988872528,
|
|
"rewards/frontier_coverage_10": 0.958567988872528,
|
|
"rewards/frontier_coverage_15": 0.958567988872528,
|
|
"rewards/frontier_coverage_20": 0.958567988872528,
|
|
"rewards/frontier_coverage_25": 0.958567988872528,
|
|
"rewards/frontier_coverage_5": 0.958567988872528,
|
|
"rewards/frontier_ece_reward": 0.00036459730472415686,
|
|
"rewards/frontier_entropy_batch_reward": -0.9456805467605591,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.009702853672206403,
|
|
"signal/advantage_pre_scale_abs_mean": 0.009702853672206403,
|
|
"signal/advantage_pre_scale_std": 0.027841240912675858,
|
|
"signal/advantage_std": 0.027841240912675858,
|
|
"signal/brier_reward/centered_abs_mean": 0.0023789591854438187,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6232638888888888,
|
|
"signal/brier_reward/group_std_mean": 0.005185263650491834,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00023789591796230525,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.00023789591796230525,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.20213373899459838,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8822916666666668,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2441387802362442,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020213373750448228,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020213373750448228,
|
|
"signal/format_reward/centered_abs_mean": 0.0008409288129769266,
|
|
"signal/format_reward/group_bin_occupancy": 0.1267361111111111,
|
|
"signal/format_reward/group_std_mean": 0.0024552317336201668,
|
|
"signal/format_reward/group_zero_std_frac": 0.9861111044883728,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004204644064884633,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004204644064884633,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 2.9672855816897936e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7934027777777779,
|
|
"signal/frontier_aurc_reward/group_std_mean": 4.578574880724773e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.70910709079908e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.70910709079908e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.03771033436059952,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8166666666666667,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.052084099501371384,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0037710336968302728,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037710336968302728,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.03771033436059952,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8166666666666667,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.052084099501371384,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037710336968302728,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037710336968302728,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.03771033436059952,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8166666666666667,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.052084099501371384,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037710336968302728,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037710336968302728,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.03771033436059952,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8166666666666667,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.052084099501371384,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037710336968302728,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037710336968302728,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.03771033436059952,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8166666666666667,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.052084099501371384,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037710336968302728,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037710336968302728,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.03771033436059952,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8166666666666667,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.052084099501371384,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037710336968302728,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037710336968302728,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.03771033436059952,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8166666666666667,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.052084099501371384,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037710336968302728,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037710336968302728,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0008136974531225861,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.4142361111111111,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0020070787984877827,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 8.136974647641182e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 8.136974647641182e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09818044751882553,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.19889387488365173,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1805555522441864,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.009818044863641262,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.009818044863641262,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.0669231489987575,
|
|
"calibration/batch_entropy_100bins": 0.4233465784923978,
|
|
"calibration/batch_entropy_10bins": 0.0669231489987575,
|
|
"calibration/batch_entropy_50bins": 0.3287001157455989,
|
|
"calibration/batch_uniqueness": 0.5034230651863096,
|
|
"calibration/buffer_distribution_entropy": 0.7353438977404082,
|
|
"calibration/buffer_entropy_100bins": 0.787261287203169,
|
|
"calibration/buffer_entropy_10bins": 0.7353438977404082,
|
|
"calibration/buffer_entropy_50bins": 0.7757620948845135,
|
|
"calibration/confidence_entropy": 0.0983193265772078,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.02480179368577078,
|
|
"calibration/mean_confidence": 0.024801793685770783,
|
|
"calibration/prompt_uniqueness": 0.4471254579647936,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0009548611111111161,
|
|
"completions/max_length": 2415.0,
|
|
"completions/max_terminated_length": 2415.0,
|
|
"completions/mean_length": 661.5493041992188,
|
|
"completions/mean_terminated_length": 662.1683471679687,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 162.4,
|
|
"epoch": 0.34799565005437433,
|
|
"grad_norm": 5.204364060773514e-05,
|
|
"learning_rate": 1.8975903614457832e-06,
|
|
"loss": -0.0013,
|
|
"num_tokens": 308993264.0,
|
|
"reward": 1.2135377645492553,
|
|
"reward_std": 0.01914830394089222,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9976012587547303,
|
|
"rewards/confidence_uniqueness_reward": 0.46597900390625,
|
|
"rewards/format_reward": 0.9990451335906982,
|
|
"rewards/frontier_aurc_reward": -0.003718587104231119,
|
|
"rewards/frontier_coverage_0": 0.9543892621994019,
|
|
"rewards/frontier_coverage_1": 0.9543892621994019,
|
|
"rewards/frontier_coverage_10": 0.9543892621994019,
|
|
"rewards/frontier_coverage_15": 0.9543892621994019,
|
|
"rewards/frontier_coverage_20": 0.9543892621994019,
|
|
"rewards/frontier_coverage_25": 0.892327880859375,
|
|
"rewards/frontier_coverage_5": 0.9543892621994019,
|
|
"rewards/frontier_ece_reward": 0.0004319649888202548,
|
|
"rewards/frontier_entropy_batch_reward": -0.942059063911438,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.011962970346212387,
|
|
"signal/advantage_pre_scale_abs_mean": 0.011962970346212387,
|
|
"signal/advantage_pre_scale_std": 0.036182846128940585,
|
|
"signal/advantage_std": 0.036182846128940585,
|
|
"signal/brier_reward/centered_abs_mean": 0.003550727292895317,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6284722222222222,
|
|
"signal/brier_reward/group_std_mean": 0.007100052293390035,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00035507273860275746,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.00035507273860275746,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.20121129155158995,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8690972222222222,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.23962823748588563,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020121129229664803,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020121129229664803,
|
|
"signal/format_reward/centered_abs_mean": 0.0017415364738553763,
|
|
"signal/format_reward/group_bin_occupancy": 0.12743055555555557,
|
|
"signal/format_reward/group_std_mean": 0.003971005976200104,
|
|
"signal/format_reward/group_zero_std_frac": 0.9805555462837219,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008707682369276881,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008707682369276881,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 3.7338528636610134e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7989583333333334,
|
|
"signal/frontier_aurc_reward/group_std_mean": 5.7591882068663836e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6673160909449506e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6673160909449506e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.041554590314626695,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8256944444444445,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.05725188627839088,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00415545916184783,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00415545916184783,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.041554590314626695,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8256944444444445,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.05725188627839088,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00415545916184783,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00415545916184783,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.041554590314626695,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8256944444444445,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.05725188627839088,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00415545916184783,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00415545916184783,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.041554590314626695,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8256944444444445,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.05725188627839088,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00415545916184783,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00415545916184783,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.041554590314626695,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8256944444444445,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.05725188627839088,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00415545916184783,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00415545916184783,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04010423347353935,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8260416666666666,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.055193550139665606,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00401042359881103,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00401042359881103,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.041554590314626695,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8256944444444445,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.05725188627839088,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00415545916184783,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00415545916184783,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0009842410683631897,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.4177083333333334,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0023657533805817367,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 9.842410509008914e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 9.842410509008914e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10397121906280518,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.31354166666666666,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.20375558137893676,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.205555559694767,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010397122614085675,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010397122614085675,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.057605848650572555,
|
|
"calibration/batch_entropy_100bins": 0.39059723252365697,
|
|
"calibration/batch_entropy_10bins": 0.057605848650572555,
|
|
"calibration/batch_entropy_50bins": 0.29644429432289326,
|
|
"calibration/batch_uniqueness": 0.4236975737013129,
|
|
"calibration/buffer_distribution_entropy": 0.7161117607960759,
|
|
"calibration/buffer_entropy_100bins": 0.7796128759731576,
|
|
"calibration/buffer_entropy_10bins": 0.7161117607960759,
|
|
"calibration/buffer_entropy_50bins": 0.762393356954156,
|
|
"calibration/confidence_entropy": 0.08499628039622698,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.021287106376922514,
|
|
"calibration/mean_confidence": 0.021287106376922514,
|
|
"calibration/prompt_uniqueness": 0.3726409393426986,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0006944444444444642,
|
|
"completions/max_length": 2865.8,
|
|
"completions/max_terminated_length": 2865.8,
|
|
"completions/mean_length": 712.83681640625,
|
|
"completions/mean_terminated_length": 713.329345703125,
|
|
"completions/min_length": 31.6,
|
|
"completions/min_terminated_length": 173.0,
|
|
"epoch": 0.3599955000562493,
|
|
"grad_norm": 5.70491720282007e-05,
|
|
"learning_rate": 1.7469879518072292e-06,
|
|
"loss": -0.0008,
|
|
"num_tokens": 320315480.0,
|
|
"reward": 1.1840367317199707,
|
|
"reward_std": 0.01757926493883133,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9977462530136109,
|
|
"rewards/confidence_uniqueness_reward": 0.4628437697887421,
|
|
"rewards/format_reward": 0.9993055582046508,
|
|
"rewards/frontier_aurc_reward": -0.0038903028704226016,
|
|
"rewards/frontier_coverage_0": 0.9538887143135071,
|
|
"rewards/frontier_coverage_1": 0.9538887143135071,
|
|
"rewards/frontier_coverage_10": 0.9538887143135071,
|
|
"rewards/frontier_coverage_15": 0.9538887143135071,
|
|
"rewards/frontier_coverage_20": 0.8945078253746033,
|
|
"rewards/frontier_coverage_25": 0.6542877435684205,
|
|
"rewards/frontier_coverage_5": 0.9538887143135071,
|
|
"rewards/frontier_ece_reward": 0.0004184367600828409,
|
|
"rewards/frontier_entropy_batch_reward": -0.9349219322204589,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.01071018297225237,
|
|
"signal/advantage_pre_scale_abs_mean": 0.01071018297225237,
|
|
"signal/advantage_pre_scale_std": 0.031720586493611334,
|
|
"signal/advantage_std": 0.031720586493611334,
|
|
"signal/brier_reward/centered_abs_mean": 0.0033094821963459255,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6034722222222222,
|
|
"signal/brier_reward/group_std_mean": 0.006449029874056577,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0003309482126496732,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0003309482126496732,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.21715166866779329,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8694444444444445,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2559134304523468,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02171516865491867,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02171516865491867,
|
|
"signal/format_reward/centered_abs_mean": 0.0013020833255723118,
|
|
"signal/format_reward/group_bin_occupancy": 0.1267361111111111,
|
|
"signal/format_reward/group_std_mean": 0.00297891478985548,
|
|
"signal/format_reward/group_zero_std_frac": 0.9861111044883728,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006510416627861559,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006510416627861559,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 4.027687973575667e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7857638888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 6.0778418992413207e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.034610012444318e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.034610012444318e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.043310850858688354,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.059559579193592074,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004331085272133351,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004331085272133351,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.043310850858688354,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.059559579193592074,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004331085272133351,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004331085272133351,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.043310850858688354,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.059559579193592074,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004331085272133351,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004331085272133351,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.043310850858688354,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.059559579193592074,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004331085272133351,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004331085272133351,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.04183773845434189,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.057459451258182526,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004183773742988705,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004183773742988705,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.03539830669760704,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8079861111111111,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.048354345560073855,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035398307256400583,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035398307256400583,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.043310850858688354,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.059559579193592074,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004331085272133351,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004331085272133351,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0009813750861212612,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.4104166666666667,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0022871824447065594,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 9.813751239562407e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 9.813751239562407e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.11633445173501969,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3201388888888889,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.22344749867916108,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.138888893276453,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011633445136249065,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011633445136249065,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.3599955000562493,
|
|
"eval_calibration/aurc": 1.0,
|
|
"eval_calibration/batch_distribution_entropy": 0.07963192987356056,
|
|
"eval_calibration/batch_entropy_100bins": 0.38521584880230836,
|
|
"eval_calibration/batch_entropy_10bins": 0.07963192987356056,
|
|
"eval_calibration/batch_entropy_50bins": 0.3103724394449236,
|
|
"eval_calibration/batch_uniqueness": 0.4830729166666667,
|
|
"eval_calibration/buffer_distribution_entropy": 0.7010500110603589,
|
|
"eval_calibration/buffer_entropy_100bins": 0.7731452309964988,
|
|
"eval_calibration/buffer_entropy_10bins": 0.7010500110603589,
|
|
"eval_calibration/buffer_entropy_50bins": 0.7519987833032866,
|
|
"eval_calibration/confidence_entropy": 0.10205378013778547,
|
|
"eval_calibration/coverage@0%": 0.0,
|
|
"eval_calibration/coverage@1%": 0.0,
|
|
"eval_calibration/coverage@10%": 0.0,
|
|
"eval_calibration/coverage@15%": 0.0,
|
|
"eval_calibration/coverage@20%": 0.0,
|
|
"eval_calibration/coverage@25%": 0.0,
|
|
"eval_calibration/coverage@30%": 0.0,
|
|
"eval_calibration/coverage@5%": 0.0,
|
|
"eval_calibration/ece": 0.02598170129704982,
|
|
"eval_calibration/mean_confidence": 0.02598170129704982,
|
|
"eval_calibration/prompt_uniqueness": 0.4830729166666667,
|
|
"eval_completions/clipped_ratio": 0.0017361111111111234,
|
|
"eval_completions/max_length": 1706.8333333333333,
|
|
"eval_completions/max_terminated_length": 1706.8333333333333,
|
|
"eval_completions/mean_length": 674.4022725423177,
|
|
"eval_completions/mean_terminated_length": 675.5978190104166,
|
|
"eval_completions/min_length": 171.66666666666666,
|
|
"eval_completions/min_terminated_length": 246.16666666666666,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 320315480.0,
|
|
"eval_reward": 1.152519702911377,
|
|
"eval_reward_std": 0.036670129746198654,
|
|
"eval_rewards/accuracy_reward": 0.0,
|
|
"eval_rewards/brier_reward": 0.9963855147361755,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.46905451516310376,
|
|
"eval_rewards/format_reward": 0.9982638955116272,
|
|
"eval_rewards/frontier_aurc_reward": -0.004004960569242637,
|
|
"eval_rewards/frontier_coverage_0": 0.9477129876613617,
|
|
"eval_rewards/frontier_coverage_1": 0.9477129876613617,
|
|
"eval_rewards/frontier_coverage_10": 0.9477129876613617,
|
|
"eval_rewards/frontier_coverage_15": 0.9477129876613617,
|
|
"eval_rewards/frontier_coverage_20": 0.8091484904289246,
|
|
"eval_rewards/frontier_coverage_25": 0.5189993580182394,
|
|
"eval_rewards/frontier_coverage_5": 0.9477129876613617,
|
|
"eval_rewards/frontier_ece_reward": 0.0004888492646083856,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9982638955116272,
|
|
"eval_runtime": 126.377,
|
|
"eval_samples_per_second": 7.913,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/advantage_abs_mean": 0.021798545494675636,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.021798545494675636,
|
|
"eval_signal/advantage_pre_scale_std": 0.04753624647855759,
|
|
"eval_signal/advantage_std": 0.04753624647855759,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.005646458788154026,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.5868055555555556,
|
|
"eval_signal/brier_reward/group_std_mean": 0.013697403056236604,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0005646459176205099,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0005646459176205099,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.21048006663719812,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7777777777777778,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.24474786967039108,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02104800660163164,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02104800660163164,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0033637151742974916,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.13194444444444445,
|
|
"eval_signal/format_reward/group_std_mean": 0.009820927555362383,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.944444457689921,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0016818575871487458,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0016818575871487458,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 5.219127585102493e-05,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7465277777777777,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 9.327459156338591e-05,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.523909803490824e-07,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.523909803490824e-07,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.0491910595446825,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.8020833333333335,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.06921165560682614,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004919106063122551,
|
|
"eval_signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004919106063122551,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.0491910595446825,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.8020833333333335,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.06921165560682614,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004919106063122551,
|
|
"eval_signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004919106063122551,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.0491910595446825,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.8020833333333335,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.06921165560682614,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004919106063122551,
|
|
"eval_signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004919106063122551,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.0491910595446825,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.8020833333333335,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.06921165560682614,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004919106063122551,
|
|
"eval_signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004919106063122551,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.04529993608593941,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.7986111111111112,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.06353887729346752,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004529993748292327,
|
|
"eval_signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004529993748292327,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.03559759445488453,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.8055555555555557,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.049026252080996834,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035597594687715173,
|
|
"eval_signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035597594687715173,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.0491910595446825,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.8020833333333335,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.06921165560682614,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004919106063122551,
|
|
"eval_signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004919106063122551,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0011601041769608855,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.4166666666666666,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.002786213590297848,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00011601041721102472,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00011601041721102472,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0033637151742974916,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.13194444444444445,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.009820927555362383,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.944444457689921,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0003363715174297492,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0003363715174297492,
|
|
"eval_steps_per_second": 0.047,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.07433150140530483,
|
|
"calibration/batch_entropy_100bins": 0.40955894700259393,
|
|
"calibration/batch_entropy_10bins": 0.07433150140530483,
|
|
"calibration/batch_entropy_50bins": 0.31573255203630846,
|
|
"calibration/batch_uniqueness": 0.4638189879413958,
|
|
"calibration/buffer_distribution_entropy": 0.6885061820464987,
|
|
"calibration/buffer_entropy_100bins": 0.7678003824579658,
|
|
"calibration/buffer_entropy_10bins": 0.6885061820464987,
|
|
"calibration/buffer_entropy_50bins": 0.7438445488804817,
|
|
"calibration/confidence_entropy": 0.09220933377109393,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.02361125712010372,
|
|
"calibration/mean_confidence": 0.023611257120103722,
|
|
"calibration/prompt_uniqueness": 0.41207164032691634,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0006944444444444642,
|
|
"completions/max_length": 2119.8,
|
|
"completions/max_terminated_length": 2119.8,
|
|
"completions/mean_length": 668.18134765625,
|
|
"completions/mean_terminated_length": 668.652392578125,
|
|
"completions/min_length": 80.2,
|
|
"completions/min_terminated_length": 179.8,
|
|
"epoch": 0.3719953500581243,
|
|
"grad_norm": 0.00011576049291761592,
|
|
"learning_rate": 1.5963855421686747e-06,
|
|
"loss": -0.0008,
|
|
"num_tokens": 331120641.0,
|
|
"reward": 1.145693302154541,
|
|
"reward_std": 0.01810295507311821,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9974946975708008,
|
|
"rewards/confidence_uniqueness_reward": 0.47134496569633483,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.004138502664864064,
|
|
"rewards/frontier_coverage_0": 0.9522245168685913,
|
|
"rewards/frontier_coverage_1": 0.9522245168685913,
|
|
"rewards/frontier_coverage_10": 0.9522245168685913,
|
|
"rewards/frontier_coverage_15": 0.9380970597267151,
|
|
"rewards/frontier_coverage_20": 0.7361512064933777,
|
|
"rewards/frontier_coverage_25": 0.44430738091468813,
|
|
"rewards/frontier_coverage_5": 0.9522245168685913,
|
|
"rewards/frontier_ece_reward": 0.0003640947339590639,
|
|
"rewards/frontier_entropy_batch_reward": -0.9353014349937439,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.010529661551117897,
|
|
"signal/advantage_pre_scale_abs_mean": 0.010529661551117897,
|
|
"signal/advantage_pre_scale_std": 0.03242862112820148,
|
|
"signal/advantage_std": 0.03242862112820148,
|
|
"signal/brier_reward/centered_abs_mean": 0.003718013083562255,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6059027777777778,
|
|
"signal/brier_reward/group_std_mean": 0.007659111265093088,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0003718013147590682,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0003718013147590682,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2201870173215866,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8548611111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2593624711036682,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02201870158314705,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02201870158314705,
|
|
"signal/format_reward/centered_abs_mean": 0.001491970452480018,
|
|
"signal/format_reward/group_bin_occupancy": 0.12743055555555555,
|
|
"signal/format_reward/group_std_mean": 0.0038215355947613717,
|
|
"signal/format_reward/group_zero_std_frac": 0.9805555462837219,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000745985226240009,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000745985226240009,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 4.6402324369410056e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7687499999999999,
|
|
"signal/frontier_aurc_reward/group_std_mean": 7.237604731926694e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.800290864499402e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.800290864499402e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.045414050668478013,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8079861111111111,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.06280554011464119,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004541405383497477,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004541405383497477,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.045414050668478013,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8079861111111111,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.06280554011464119,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004541405383497477,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004541405383497477,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.045414050668478013,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8079861111111111,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.06280554011464119,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004541405383497477,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004541405383497477,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.04506048187613487,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8083333333333332,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.062302114069461824,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004506048187613488,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004506048187613488,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.03970897793769836,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8104166666666668,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.05463676452636719,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003970897663384676,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003970897663384676,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.030375007912516593,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8152777777777779,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.04137557670474053,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003037500847131014,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003037500847131014,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.045414050668478013,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8079861111111111,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.06280554011464119,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004541405383497477,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004541405383497477,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0009263153071515263,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.44548611111111114,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0021566389128565787,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 9.263153333449736e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 9.263153333449736e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.11519579887390137,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.33472222222222225,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.21977143883705139,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.14166666865348815,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011519579775631427,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011519579775631427,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.09049986613417273,
|
|
"calibration/batch_entropy_100bins": 0.41836594311176223,
|
|
"calibration/batch_entropy_10bins": 0.09049986613417273,
|
|
"calibration/batch_entropy_50bins": 0.3264790681345277,
|
|
"calibration/batch_uniqueness": 0.47830015613269194,
|
|
"calibration/buffer_distribution_entropy": 0.6578983569779517,
|
|
"calibration/buffer_entropy_100bins": 0.7546975009480232,
|
|
"calibration/buffer_entropy_10bins": 0.6578983569779517,
|
|
"calibration/buffer_entropy_50bins": 0.7244401883194689,
|
|
"calibration/confidence_entropy": 0.09609417117910231,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.024954540242279003,
|
|
"calibration/mean_confidence": 0.024954540242279007,
|
|
"calibration/prompt_uniqueness": 0.4211417824074074,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0007812500000000222,
|
|
"completions/max_length": 2455.2,
|
|
"completions/max_terminated_length": 2455.2,
|
|
"completions/mean_length": 671.3717895507813,
|
|
"completions/mean_terminated_length": 671.8844848632813,
|
|
"completions/min_length": 29.4,
|
|
"completions/min_terminated_length": 172.6,
|
|
"epoch": 0.38399520005999926,
|
|
"grad_norm": 5.154682366992347e-05,
|
|
"learning_rate": 1.4457831325301204e-06,
|
|
"loss": -0.0009,
|
|
"num_tokens": 341942140.0,
|
|
"reward": 1.1252743005752563,
|
|
"reward_std": 0.01854655463248491,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9972919225692749,
|
|
"rewards/confidence_uniqueness_reward": 0.47839406728744505,
|
|
"rewards/format_reward": 0.9991319298744201,
|
|
"rewards/frontier_aurc_reward": -0.004414523858577013,
|
|
"rewards/frontier_coverage_0": 0.9507062673568726,
|
|
"rewards/frontier_coverage_1": 0.9507062673568726,
|
|
"rewards/frontier_coverage_10": 0.9507062673568726,
|
|
"rewards/frontier_coverage_15": 0.9317117929458618,
|
|
"rewards/frontier_coverage_20": 0.6626289486885071,
|
|
"rewards/frontier_coverage_25": 0.3123237192630768,
|
|
"rewards/frontier_coverage_5": 0.9507062673568726,
|
|
"rewards/frontier_ece_reward": 0.0003186647722031921,
|
|
"rewards/frontier_entropy_batch_reward": -0.9278595924377442,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.010827501863241195,
|
|
"signal/advantage_pre_scale_abs_mean": 0.010827501863241195,
|
|
"signal/advantage_pre_scale_std": 0.03373810928314924,
|
|
"signal/advantage_std": 0.03373810928314924,
|
|
"signal/brier_reward/centered_abs_mean": 0.004014838207513094,
|
|
"signal/brier_reward/group_bin_occupancy": 0.5802083333333334,
|
|
"signal/brier_reward/group_std_mean": 0.0078026833012700084,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00040148382540792225,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.00040148382540792225,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.21236040890216829,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8649305555555558,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.25193352103233335,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021236040443181992,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.021236040443181992,
|
|
"signal/format_reward/centered_abs_mean": 0.0016059027751907707,
|
|
"signal/format_reward/group_bin_occupancy": 0.12708333333333333,
|
|
"signal/format_reward/group_std_mean": 0.0035807004664093257,
|
|
"signal/format_reward/group_zero_std_frac": 0.9833333253860473,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008029513875953854,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008029513875953854,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 5.2095612045377496e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7486111111111111,
|
|
"signal/frontier_aurc_reward/group_std_mean": 8.059373503783717e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.511951596621657e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.511951596621657e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.046503888070583345,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.795138888888889,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.06489058881998062,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004650388844311237,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004650388844311237,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.046503888070583345,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.795138888888889,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.06489058881998062,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004650388844311237,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004650388844311237,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.046503888070583345,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.795138888888889,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.06489058881998062,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004650388844311237,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004650388844311237,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.04594070836901665,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.795138888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.06408572494983673,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004594070836901665,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004594070836901665,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.03836109861731529,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.7961805555555557,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.05321277379989624,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038361097220331432,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038361097220331432,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.025570582970976828,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8079861111111111,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.03507376089692116,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002557058446109295,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002557058446109295,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.046503888070583345,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.795138888888889,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.06489058881998062,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004650388844311237,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004650388844311237,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0008886751136742532,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.44722222222222224,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.002127536362968385,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 8.886751311365515e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 8.886751311365515e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.12815287709236145,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3399305555555555,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.23999074399471282,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.11388888880610466,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01281528789550066,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01281528789550066,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.11315508988168552,
|
|
"calibration/batch_entropy_100bins": 0.4314929978888915,
|
|
"calibration/batch_entropy_10bins": 0.11315508988168552,
|
|
"calibration/batch_entropy_50bins": 0.3423038694920577,
|
|
"calibration/batch_uniqueness": 0.48717437446107875,
|
|
"calibration/buffer_distribution_entropy": 0.6237706258889,
|
|
"calibration/buffer_entropy_100bins": 0.7391337749032101,
|
|
"calibration/buffer_entropy_10bins": 0.6237706258889,
|
|
"calibration/buffer_entropy_50bins": 0.7026557564520889,
|
|
"calibration/confidence_entropy": 0.10202881321508014,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.02723046109240631,
|
|
"calibration/mean_confidence": 0.027230461092406316,
|
|
"calibration/prompt_uniqueness": 0.4402623542100243,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0003472222222222321,
|
|
"completions/max_length": 2025.0,
|
|
"completions/max_terminated_length": 2025.0,
|
|
"completions/mean_length": 663.9360229492188,
|
|
"completions/mean_terminated_length": 664.1629760742187,
|
|
"completions/min_length": 66.6,
|
|
"completions/min_terminated_length": 171.6,
|
|
"epoch": 0.39599505006187424,
|
|
"grad_norm": 7.190422184066847e-05,
|
|
"learning_rate": 1.2951807228915664e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 352729755.0,
|
|
"reward": 1.1091915130615235,
|
|
"reward_std": 0.017433946020901203,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9974352121353149,
|
|
"rewards/confidence_uniqueness_reward": 0.5001117169857026,
|
|
"rewards/format_reward": 0.9995659708976745,
|
|
"rewards/frontier_aurc_reward": -0.004717729333788156,
|
|
"rewards/frontier_coverage_0": 0.9473315596580505,
|
|
"rewards/frontier_coverage_1": 0.9473315596580505,
|
|
"rewards/frontier_coverage_10": 0.9473315596580505,
|
|
"rewards/frontier_coverage_15": 0.9283288359642029,
|
|
"rewards/frontier_coverage_20": 0.5928752660751343,
|
|
"rewards/frontier_coverage_25": 0.20689672827720643,
|
|
"rewards/frontier_coverage_5": 0.9473315596580505,
|
|
"rewards/frontier_ece_reward": 0.00027219949988648294,
|
|
"rewards/frontier_entropy_batch_reward": -0.9205714225769043,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.010360554978251457,
|
|
"signal/advantage_pre_scale_abs_mean": 0.010360554978251457,
|
|
"signal/advantage_pre_scale_std": 0.0259440615773201,
|
|
"signal/advantage_std": 0.0259440615773201,
|
|
"signal/brier_reward/centered_abs_mean": 0.0036302336025983094,
|
|
"signal/brier_reward/group_bin_occupancy": 0.5999999999999999,
|
|
"signal/brier_reward/group_std_mean": 0.0071899168193340305,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00036302337539382277,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.00036302337539382277,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.21097120344638826,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8465277777777777,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.24810958802700042,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021097120270133017,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.021097120270133017,
|
|
"signal/format_reward/centered_abs_mean": 0.0008409288013353944,
|
|
"signal/format_reward/group_bin_occupancy": 0.1267361111111111,
|
|
"signal/format_reward/group_std_mean": 0.0024552317336201668,
|
|
"signal/format_reward/group_zero_std_frac": 0.9861111044883728,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004204644006676972,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004204644006676972,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 5.836021373397671e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7534722222222222,
|
|
"signal/frontier_aurc_reward/group_std_mean": 8.929047762649134e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.295026534848148e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.295026534848148e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.0495891772210598,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8024305555555555,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.06838846206665039,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00495891785249114,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00495891785249114,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0495891772210598,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8024305555555555,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.06838846206665039,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00495891785249114,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00495891785249114,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0495891772210598,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8024305555555555,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.06838846206665039,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00495891785249114,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00495891785249114,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.04905526265501976,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8024305555555555,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.06764949411153794,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00490552643314004,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00490552643314004,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.03878045603632927,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.05315817221999168,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038780457805842163,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038780457805842163,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.021978146955370904,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.820138888888889,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.029635490104556083,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002197814825922251,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002197814825922251,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0495891772210598,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8024305555555555,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.06838846206665039,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00495891785249114,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00495891785249114,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0008334407932125032,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.4770833333333334,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0019264052622020244,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 8.334408194059506e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 8.334408194059506e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1398734837770462,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3708333333333333,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2520256280899048,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06944444626569748,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013987349346280098,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013987349346280098,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.12664223510339462,
|
|
"calibration/batch_entropy_100bins": 0.4533190068324675,
|
|
"calibration/batch_entropy_10bins": 0.12664223510339462,
|
|
"calibration/batch_entropy_50bins": 0.36405689826353826,
|
|
"calibration/batch_uniqueness": 0.5366184798988781,
|
|
"calibration/buffer_distribution_entropy": 0.5884116370549787,
|
|
"calibration/buffer_entropy_100bins": 0.7212360190144044,
|
|
"calibration/buffer_entropy_10bins": 0.5884116370549787,
|
|
"calibration/buffer_entropy_50bins": 0.6790585356261435,
|
|
"calibration/confidence_entropy": 0.1112857625829545,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.03005219126552771,
|
|
"calibration/mean_confidence": 0.03005219126552771,
|
|
"calibration/prompt_uniqueness": 0.48039785879629626,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0006944444444444642,
|
|
"completions/max_length": 2424.8,
|
|
"completions/max_terminated_length": 2424.8,
|
|
"completions/mean_length": 664.7514892578125,
|
|
"completions/mean_terminated_length": 665.209765625,
|
|
"completions/min_length": 58.2,
|
|
"completions/min_terminated_length": 159.8,
|
|
"epoch": 0.4079949000637492,
|
|
"grad_norm": 5.9971396694891155e-05,
|
|
"learning_rate": 1.1445783132530121e-06,
|
|
"loss": -0.0008,
|
|
"num_tokens": 363476876.0,
|
|
"reward": 1.0961509227752686,
|
|
"reward_std": 0.02001706659793854,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9968130946159363,
|
|
"rewards/confidence_uniqueness_reward": 0.5170513212680816,
|
|
"rewards/format_reward": 0.9991319417953491,
|
|
"rewards/frontier_aurc_reward": -0.005049161147326231,
|
|
"rewards/frontier_coverage_0": 0.9443602681159973,
|
|
"rewards/frontier_coverage_1": 0.9443602681159973,
|
|
"rewards/frontier_coverage_10": 0.9443602681159973,
|
|
"rewards/frontier_coverage_15": 0.9223458051681519,
|
|
"rewards/frontier_coverage_20": 0.5228747487068176,
|
|
"rewards/frontier_coverage_25": 0.149213108420372,
|
|
"rewards/frontier_coverage_5": 0.9443602681159973,
|
|
"rewards/frontier_ece_reward": 0.0002292450924869627,
|
|
"rewards/frontier_entropy_batch_reward": -0.9194879293441772,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.01188025027513504,
|
|
"signal/advantage_pre_scale_abs_mean": 0.01188025027513504,
|
|
"signal/advantage_pre_scale_std": 0.033895105868577954,
|
|
"signal/advantage_std": 0.033895105868577954,
|
|
"signal/brier_reward/centered_abs_mean": 0.004660056484863162,
|
|
"signal/brier_reward/group_bin_occupancy": 0.603125,
|
|
"signal/brier_reward/group_std_mean": 0.009344376530498267,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0004660056554712355,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0004660056554712355,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.20060275495052338,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8604166666666666,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2357197880744934,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020060275867581367,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020060275867581367,
|
|
"signal/format_reward/centered_abs_mean": 0.0016601562150754035,
|
|
"signal/format_reward/group_bin_occupancy": 0.1277777777777778,
|
|
"signal/format_reward/group_std_mean": 0.004312581941485405,
|
|
"signal/format_reward/group_zero_std_frac": 0.9777777671813965,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008300781075377018,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008300781075377018,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 7.052028959151358e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7475694444444444,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00011104991572210565,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.815036608211813e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.815036608211813e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.05183984935283661,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8131944444444444,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.0717699870467186,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0051839851774275305,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0051839851774275305,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.05183984935283661,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8131944444444444,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0717699870467186,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0051839851774275305,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0051839851774275305,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.05183984935283661,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8131944444444444,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0717699870467186,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0051839851774275305,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0051839851774275305,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.05118511840701103,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8131944444444444,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.07084002643823624,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0051185118034482,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0051185118034482,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.03787098824977875,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8190972222222221,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.05193596109747887,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037870988249778747,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037870988249778747,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.019114065170288085,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8402777777777777,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.025573540851473807,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019114065449684857,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019114065449684857,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.05183984935283661,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8131944444444444,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0717699870467186,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0051839851774275305,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0051839851774275305,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0008435256313532591,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.5055555555555555,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.001923717954196036,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 8.435256313532591e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 8.435256313532591e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.14058307111263274,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.37777777777777777,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.249654358625412,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.07500000223517418,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014058307558298112,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014058307558298112,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.11065710270652036,
|
|
"calibration/batch_entropy_100bins": 0.4242565693032717,
|
|
"calibration/batch_entropy_10bins": 0.11065710270652036,
|
|
"calibration/batch_entropy_50bins": 0.3340393237755341,
|
|
"calibration/batch_uniqueness": 0.4780490451388889,
|
|
"calibration/buffer_distribution_entropy": 0.5503343485727277,
|
|
"calibration/buffer_entropy_100bins": 0.699339396111367,
|
|
"calibration/buffer_entropy_10bins": 0.5503343485727277,
|
|
"calibration/buffer_entropy_50bins": 0.6523164013443059,
|
|
"calibration/confidence_entropy": 0.09850787980656697,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.02609543780615011,
|
|
"calibration/mean_confidence": 0.02609543780615011,
|
|
"calibration/prompt_uniqueness": 0.4217447916666666,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0008680555555555802,
|
|
"completions/max_length": 2943.6,
|
|
"completions/max_terminated_length": 2943.6,
|
|
"completions/mean_length": 690.4206787109375,
|
|
"completions/mean_terminated_length": 691.0146606445312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 171.0,
|
|
"epoch": 0.4199947500656242,
|
|
"grad_norm": 6.962921179365367e-05,
|
|
"learning_rate": 9.93975903614458e-07,
|
|
"loss": -0.0011,
|
|
"num_tokens": 374538490.0,
|
|
"reward": 1.0829426765441894,
|
|
"reward_std": 0.019469749182462692,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9969998836517334,
|
|
"rewards/confidence_uniqueness_reward": 0.48848451375961305,
|
|
"rewards/format_reward": 0.9991319417953491,
|
|
"rewards/frontier_aurc_reward": -0.005386561527848244,
|
|
"rewards/frontier_coverage_0": 0.9478386521339417,
|
|
"rewards/frontier_coverage_1": 0.9478386521339417,
|
|
"rewards/frontier_coverage_10": 0.9452585101127624,
|
|
"rewards/frontier_coverage_15": 0.9100270628929138,
|
|
"rewards/frontier_coverage_20": 0.45304937958717345,
|
|
"rewards/frontier_coverage_25": 0.11471473574638366,
|
|
"rewards/frontier_coverage_5": 0.9478386521339417,
|
|
"rewards/frontier_ece_reward": 7.881744513724698e-05,
|
|
"rewards/frontier_entropy_batch_reward": -0.9176892876625061,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.01158843245357275,
|
|
"signal/advantage_pre_scale_abs_mean": 0.01158843245357275,
|
|
"signal/advantage_pre_scale_std": 0.034636962413787845,
|
|
"signal/advantage_std": 0.034636962413787845,
|
|
"signal/brier_reward/centered_abs_mean": 0.00444792709313333,
|
|
"signal/brier_reward/group_bin_occupancy": 0.60625,
|
|
"signal/brier_reward/group_std_mean": 0.0084507011808455,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00044479272910393777,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.00044479272910393777,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.22068254351615907,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8458333333333332,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2589582115411758,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02206825464963913,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02206825464963913,
|
|
"signal/format_reward/centered_abs_mean": 0.0016276041511446237,
|
|
"signal/format_reward/group_bin_occupancy": 0.12708333333333333,
|
|
"signal/format_reward/group_std_mean": 0.003662066673859954,
|
|
"signal/format_reward/group_zero_std_frac": 0.9833333253860473,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008138020755723118,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008138020755723118,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 7.466641545761377e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.74375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00011583235755097121,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.333302159575397e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.333302159575397e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.05025492906570435,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8079861111111111,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.06967095285654068,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005025493167340756,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005025493167340756,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.05025492906570435,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8079861111111111,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.06967095285654068,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005025493167340756,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005025493167340756,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.050183454900979994,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8079861111111111,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.06957284063100815,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005018345545977354,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005018345545977354,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.04914246648550034,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8079861111111113,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.06813211217522622,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004914247151464224,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004914247151464224,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.033781594783067706,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.814236111111111,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.04629666060209274,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033781595062464475,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033781595062464475,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.01581826526671648,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8413194444444445,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.021130923926830292,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00158182664308697,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00158182664308697,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.05025492906570435,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8079861111111111,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.06967095285654068,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005025493167340756,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005025493167340756,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0006432678666897119,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.5315972222222223,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0014886658871546387,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 6.43267878331244e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 6.43267878331244e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.14317719340324403,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.37430555555555556,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2517729789018631,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.07222222350537777,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014317720010876656,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014317720010876656,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.12243381522418999,
|
|
"calibration/batch_entropy_100bins": 0.43948610026723023,
|
|
"calibration/batch_entropy_10bins": 0.12243381522418999,
|
|
"calibration/batch_entropy_50bins": 0.3486525593485308,
|
|
"calibration/batch_uniqueness": 0.5136855712749895,
|
|
"calibration/buffer_distribution_entropy": 0.5074215023331706,
|
|
"calibration/buffer_entropy_100bins": 0.6735423239876466,
|
|
"calibration/buffer_entropy_10bins": 0.5074215023331706,
|
|
"calibration/buffer_entropy_50bins": 0.6219486115552227,
|
|
"calibration/confidence_entropy": 0.10537724605773757,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.028224827259382624,
|
|
"calibration/mean_confidence": 0.028224827259382624,
|
|
"calibration/prompt_uniqueness": 0.4567347923170309,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0013020833333333482,
|
|
"completions/max_length": 2687.4,
|
|
"completions/max_terminated_length": 2687.4,
|
|
"completions/mean_length": 673.0866333007813,
|
|
"completions/mean_terminated_length": 673.9573852539063,
|
|
"completions/min_length": 33.6,
|
|
"completions/min_terminated_length": 171.0,
|
|
"epoch": 0.4319946000674992,
|
|
"grad_norm": 9.977127774618566e-05,
|
|
"learning_rate": 8.433734939759036e-07,
|
|
"loss": -0.0018,
|
|
"num_tokens": 385392416.0,
|
|
"reward": 1.0712470054626464,
|
|
"reward_std": 0.021333076059818268,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9964566230773926,
|
|
"rewards/confidence_uniqueness_reward": 0.494760000705719,
|
|
"rewards/format_reward": 0.9986979246139527,
|
|
"rewards/frontier_aurc_reward": -0.005718124844133854,
|
|
"rewards/frontier_coverage_0": 0.9464028477668762,
|
|
"rewards/frontier_coverage_1": 0.9464028477668762,
|
|
"rewards/frontier_coverage_10": 0.941408348083496,
|
|
"rewards/frontier_coverage_15": 0.8598459959030151,
|
|
"rewards/frontier_coverage_20": 0.40984349250793456,
|
|
"rewards/frontier_coverage_25": 0.090379236638546,
|
|
"rewards/frontier_coverage_5": 0.9464028477668762,
|
|
"rewards/frontier_ece_reward": -0.00010452797578182071,
|
|
"rewards/frontier_entropy_batch_reward": -0.9121028780937195,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.012168660387396812,
|
|
"signal/advantage_pre_scale_abs_mean": 0.012168660387396812,
|
|
"signal/advantage_pre_scale_std": 0.03872519172728062,
|
|
"signal/advantage_std": 0.03872519172728062,
|
|
"signal/brier_reward/centered_abs_mean": 0.005366379115730524,
|
|
"signal/brier_reward/group_bin_occupancy": 0.5815972222222221,
|
|
"signal/brier_reward/group_std_mean": 0.010433847829699517,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0005366379395127296,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0005366379395127296,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.21540333926677704,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.851736111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.25403536260128023,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021540333330631257,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.021540333330631257,
|
|
"signal/format_reward/centered_abs_mean": 0.002392578055150807,
|
|
"signal/format_reward/group_bin_occupancy": 0.128125,
|
|
"signal/format_reward/group_std_mean": 0.005337309651076793,
|
|
"signal/format_reward/group_zero_std_frac": 0.9749999880790711,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0011962890275754035,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0011962890275754035,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 8.230793027905748e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7076388888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00013148134166840464,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.0288491466781125e-06,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.0288491466781125e-06,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.051775246113538745,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.7840277777777779,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.07248672246932983,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005177524592727423,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005177524592727423,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.051775246113538745,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.7840277777777779,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.07248672246932983,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005177524592727423,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005177524592727423,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.05163362696766853,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.7840277777777779,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0722737193107605,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005163362808525562,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005163362808525562,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.049173231422901156,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.7840277777777779,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.06863442212343215,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004917323403060436,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004917323403060436,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.03243098296225071,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.7975694444444444,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.04470113664865494,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032430985011160374,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032430985011160374,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.014044274762272834,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8270833333333334,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.018727122619748116,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014044275041669608,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014044275041669608,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.051775246113538745,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.7840277777777779,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.07248672246932983,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005177524592727423,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005177524592727423,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0005371888051740825,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.5788194444444444,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0011012869304977357,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 5.3718879644293335e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 5.3718879644293335e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.15258081257343292,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.38159722222222225,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2634937405586243,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.050000001676380634,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015258081257343292,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015258081257343292,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.1397452059158112,
|
|
"calibration/batch_entropy_100bins": 0.4532488626623232,
|
|
"calibration/batch_entropy_10bins": 0.1397452059158112,
|
|
"calibration/batch_entropy_50bins": 0.36456030835742337,
|
|
"calibration/batch_uniqueness": 0.5317420231340483,
|
|
"calibration/buffer_distribution_entropy": 0.46346997718826266,
|
|
"calibration/buffer_entropy_100bins": 0.6473612509534107,
|
|
"calibration/buffer_entropy_10bins": 0.46346997718826266,
|
|
"calibration/buffer_entropy_50bins": 0.5911614288645997,
|
|
"calibration/confidence_entropy": 0.11180096035805913,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.03057175988552433,
|
|
"calibration/mean_confidence": 0.03057175988552433,
|
|
"calibration/prompt_uniqueness": 0.4787019949921956,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00017361111111111605,
|
|
"completions/max_length": 2614.6,
|
|
"completions/max_terminated_length": 2614.6,
|
|
"completions/mean_length": 650.3674438476562,
|
|
"completions/mean_terminated_length": 650.4793334960938,
|
|
"completions/min_length": 98.0,
|
|
"completions/min_terminated_length": 166.6,
|
|
"epoch": 0.44399445006937416,
|
|
"grad_norm": 0.0001353075058432296,
|
|
"learning_rate": 6.927710843373495e-07,
|
|
"loss": -0.0003,
|
|
"num_tokens": 395974697.0,
|
|
"reward": 1.0496395826339722,
|
|
"reward_std": 0.01763119362294674,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.997331178188324,
|
|
"rewards/confidence_uniqueness_reward": 0.5169573307037354,
|
|
"rewards/format_reward": 0.9997395753860474,
|
|
"rewards/frontier_aurc_reward": -0.006073478236794471,
|
|
"rewards/frontier_coverage_0": 0.9439624428749085,
|
|
"rewards/frontier_coverage_1": 0.9439624428749085,
|
|
"rewards/frontier_coverage_10": 0.9314130187034607,
|
|
"rewards/frontier_coverage_15": 0.7630311965942382,
|
|
"rewards/frontier_coverage_20": 0.31332806348800657,
|
|
"rewards/frontier_coverage_25": 0.06819383800029755,
|
|
"rewards/frontier_coverage_5": 0.9432536959648132,
|
|
"rewards/frontier_ece_reward": -0.00026074662746395917,
|
|
"rewards/frontier_entropy_batch_reward": -0.9227155327796936,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.010541598871350288,
|
|
"signal/advantage_pre_scale_abs_mean": 0.010541598871350288,
|
|
"signal/advantage_pre_scale_std": 0.02300250492990017,
|
|
"signal/advantage_std": 0.02300250492990017,
|
|
"signal/brier_reward/centered_abs_mean": 0.0036982921417802574,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6208333333333333,
|
|
"signal/brier_reward/group_std_mean": 0.006745168566703796,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00036982920719310643,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.00036982920719310643,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.20829733610153198,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8489583333333333,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.24315473139286042,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020829734578728675,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020829734578728675,
|
|
"signal/format_reward/centered_abs_mean": 0.0005045572877861559,
|
|
"signal/format_reward/group_bin_occupancy": 0.12604166666666666,
|
|
"signal/format_reward/group_std_mean": 0.0014731390401721,
|
|
"signal/format_reward/group_zero_std_frac": 0.9916666626930237,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00025227864389307795,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00025227864389307795,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 7.683526928303763e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7350694444444444,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00011488014133647085,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.60440888775338e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.60440888775338e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.05254817381501198,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.815625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.0715567022562027,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005254817847162485,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005254817847162485,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.05254817381501198,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.815625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0715567022562027,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005254817847162485,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005254817847162485,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.052186784148216245,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8152777777777779,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.07105031162500382,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005218678433448076,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005218678433448076,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.04690430983901024,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8173611111111111,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.06369466707110405,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004690431244671345,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004690431244671345,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.02917616181075573,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8243055555555555,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.03923035711050034,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029176161624491215,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029176161624491215,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.012513933330774307,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8538194444444442,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.016343067213892937,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001251393323764205,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001251393323764205,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.05252725183963776,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.815625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.07152666449546814,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005252725258469581,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005252725258469581,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0006273603008594364,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6163194444444444,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0012211131630465387,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 6.273603576119058e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 6.273603576119058e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.13514876663684844,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.36111111111111105,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24130152761936188,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.10277777835726738,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013514876924455166,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013514876924455166,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.14159358832858687,
|
|
"calibration/batch_entropy_100bins": 0.457122083997195,
|
|
"calibration/batch_entropy_10bins": 0.14159358832858687,
|
|
"calibration/batch_entropy_50bins": 0.36937610428144063,
|
|
"calibration/batch_uniqueness": 0.5414725827467513,
|
|
"calibration/buffer_distribution_entropy": 0.4170079353065802,
|
|
"calibration/buffer_entropy_100bins": 0.6200426436493253,
|
|
"calibration/buffer_entropy_10bins": 0.4170079353065802,
|
|
"calibration/buffer_entropy_50bins": 0.5589740745093812,
|
|
"calibration/confidence_entropy": 0.11398051065385324,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.03120483008441626,
|
|
"calibration/mean_confidence": 0.031204830084416258,
|
|
"calibration/prompt_uniqueness": 0.49466172931841834,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0006076388888889061,
|
|
"completions/max_length": 2246.0,
|
|
"completions/max_terminated_length": 2246.0,
|
|
"completions/mean_length": 668.8586181640625,
|
|
"completions/mean_terminated_length": 669.2669555664063,
|
|
"completions/min_length": 28.2,
|
|
"completions/min_terminated_length": 172.8,
|
|
"epoch": 0.45599430007124914,
|
|
"grad_norm": 0.00011211562377866358,
|
|
"learning_rate": 5.421686746987952e-07,
|
|
"loss": -0.0006,
|
|
"num_tokens": 406762892.0,
|
|
"reward": 1.0342876434326171,
|
|
"reward_std": 0.020416828989982604,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9967251300811768,
|
|
"rewards/confidence_uniqueness_reward": 0.5348598599433899,
|
|
"rewards/format_reward": 0.9993055462837219,
|
|
"rewards/frontier_aurc_reward": -0.0064199940301477906,
|
|
"rewards/frontier_coverage_0": 0.9401438474655152,
|
|
"rewards/frontier_coverage_1": 0.9401438474655152,
|
|
"rewards/frontier_coverage_10": 0.9241740465164184,
|
|
"rewards/frontier_coverage_15": 0.6645206689834595,
|
|
"rewards/frontier_coverage_20": 0.2714716076850891,
|
|
"rewards/frontier_coverage_25": 0.05851527601480484,
|
|
"rewards/frontier_coverage_5": 0.9390028834342956,
|
|
"rewards/frontier_ece_reward": -0.0005113947670906782,
|
|
"rewards/frontier_entropy_batch_reward": -0.9218945980072022,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.011686141788959502,
|
|
"signal/advantage_pre_scale_abs_mean": 0.011686141788959502,
|
|
"signal/advantage_pre_scale_std": 0.031211203336715697,
|
|
"signal/advantage_std": 0.031211203336715697,
|
|
"signal/brier_reward/centered_abs_mean": 0.004695464763790369,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6270833333333333,
|
|
"signal/brier_reward/group_std_mean": 0.009423768334090709,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00046954649733379485,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.00046954649733379485,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.20274572670459748,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8649305555555555,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.23646896481513976,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020274572446942328,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020274572446942328,
|
|
"signal/format_reward/centered_abs_mean": 0.0013454860891215503,
|
|
"signal/format_reward/group_bin_occupancy": 0.12777777777777777,
|
|
"signal/format_reward/group_std_mean": 0.003928370773792267,
|
|
"signal/format_reward/group_zero_std_frac": 0.9777777671813965,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006727430445607752,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006727430445607752,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 8.048932650126517e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7298611111111112,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00012802162964362652,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.006116576718341e-06,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.006116576718341e-06,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.05514864847064018,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8149305555555557,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.0753313958644867,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0055148651823401455,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0055148651823401455,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.05514864847064018,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8149305555555557,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0753313958644867,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0055148651823401455,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0055148651823401455,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.05464940145611763,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8149305555555557,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.07461710721254348,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005464939959347248,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005464939959347248,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.045646652579307556,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8190972222222224,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.06189981997013092,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004564665257930756,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004564665257930756,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.02797577567398548,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8340277777777778,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.03734948411583901,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002797577390447259,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002797577390447259,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.011748875863850117,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8729166666666668,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.015189211443066597,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011748875956982374,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011748875956982374,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.055112923681735995,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8149305555555557,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.07528131753206253,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0055112925358116625,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0055112925358116625,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0008357930113561451,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6513888888888889,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0014408526243641973,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 8.357930200872943e-05,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 8.357930200872943e-05,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.13618087768554688,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.37881944444444443,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24314994513988494,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.07500000298023224,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013618088141083718,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013618088141083718,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.13027754640944603,
|
|
"calibration/batch_entropy_100bins": 0.4532988871809781,
|
|
"calibration/batch_entropy_10bins": 0.13027754640944603,
|
|
"calibration/batch_entropy_50bins": 0.36252827329939896,
|
|
"calibration/batch_uniqueness": 0.5393975639469387,
|
|
"calibration/buffer_distribution_entropy": 0.3707754570940318,
|
|
"calibration/buffer_entropy_100bins": 0.5929112185718033,
|
|
"calibration/buffer_entropy_10bins": 0.3707754570940318,
|
|
"calibration/buffer_entropy_50bins": 0.5270096794218817,
|
|
"calibration/confidence_entropy": 0.11147583357801931,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.02990175815167439,
|
|
"calibration/mean_confidence": 0.02990175815167439,
|
|
"calibration/prompt_uniqueness": 0.49622036778095735,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0004340277777777901,
|
|
"completions/max_length": 2352.6,
|
|
"completions/max_terminated_length": 2352.6,
|
|
"completions/mean_length": 684.2117309570312,
|
|
"completions/mean_terminated_length": 684.5085205078125,
|
|
"completions/min_length": 31.6,
|
|
"completions/min_terminated_length": 161.6,
|
|
"epoch": 0.46799415007312406,
|
|
"grad_norm": 0.00023885858536232263,
|
|
"learning_rate": 3.91566265060241e-07,
|
|
"loss": -0.0005,
|
|
"num_tokens": 417725875.0,
|
|
"reward": 1.0253089666366577,
|
|
"reward_std": 0.019479617476463318,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9968544721603394,
|
|
"rewards/confidence_uniqueness_reward": 0.5347052574157715,
|
|
"rewards/format_reward": 0.9993923544883728,
|
|
"rewards/frontier_aurc_reward": -0.006775370147079229,
|
|
"rewards/frontier_coverage_0": 0.9401466131210328,
|
|
"rewards/frontier_coverage_1": 0.9401466131210328,
|
|
"rewards/frontier_coverage_10": 0.9198176503181458,
|
|
"rewards/frontier_coverage_15": 0.6201009631156922,
|
|
"rewards/frontier_coverage_20": 0.2388811856508255,
|
|
"rewards/frontier_coverage_25": 0.05078308284282684,
|
|
"rewards/frontier_coverage_5": 0.9393264293670655,
|
|
"rewards/frontier_ece_reward": -0.0007388276979327201,
|
|
"rewards/frontier_entropy_batch_reward": -0.9230483531951904,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.011419064365327358,
|
|
"signal/advantage_pre_scale_abs_mean": 0.011419064365327358,
|
|
"signal/advantage_pre_scale_std": 0.029631392285227775,
|
|
"signal/advantage_std": 0.029631392285227775,
|
|
"signal/brier_reward/centered_abs_mean": 0.004466315684840083,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6125,
|
|
"signal/brier_reward/group_std_mean": 0.008843818213790655,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00044663152657449247,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.00044663152657449247,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.20187841057777406,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.85,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2346838593482971,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02018784135580063,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02018784135580063,
|
|
"signal/format_reward/centered_abs_mean": 0.001177300326526165,
|
|
"signal/format_reward/group_bin_occupancy": 0.12743055555555557,
|
|
"signal/format_reward/group_std_mean": 0.0034373244270682335,
|
|
"signal/format_reward/group_zero_std_frac": 0.9805555462837219,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005886501632630825,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0005886501632630825,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 7.506578403990716e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7173611111111111,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0001206688757520169,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.383222845826821e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.383222845826821e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.054283497482538225,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8135416666666666,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.07416067421436309,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005428350064903498,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005428350064903498,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.054283497482538225,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8135416666666666,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.07416067421436309,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005428350064903498,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005428350064903498,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.053661100566387177,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8135416666666666,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.07328308522701263,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00536611033603549,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00536611033603549,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.04330161884427071,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8215277777777779,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.05878105312585831,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004330162052065134,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004330162052065134,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.025675978884100913,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8336805555555555,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.03427637964487076,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025675980374217033,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025675980374217033,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.010694251023232937,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8760416666666666,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.013758376985788346,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010694251395761967,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010694251395761967,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.05425994023680687,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8135416666666666,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.07412810325622558,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005425994377583265,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005425994377583265,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.001065828336868435,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6572916666666667,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0017601919127628208,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0001065828386344947,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0001065828386344947,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.13488883078098296,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.371875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2434331715106964,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06388888973742723,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013488883711397648,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013488883711397648,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.13851863335516348,
|
|
"calibration/batch_entropy_100bins": 0.4530965748328907,
|
|
"calibration/batch_entropy_10bins": 0.13851863335516348,
|
|
"calibration/batch_entropy_50bins": 0.36334488782311547,
|
|
"calibration/batch_uniqueness": 0.5406433485539678,
|
|
"calibration/buffer_distribution_entropy": 0.32474393930528545,
|
|
"calibration/buffer_entropy_100bins": 0.5656790315407927,
|
|
"calibration/buffer_entropy_10bins": 0.32474393930528545,
|
|
"calibration/buffer_entropy_50bins": 0.49501215415121924,
|
|
"calibration/confidence_entropy": 0.11259193466071331,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.03032343621645415,
|
|
"calibration/mean_confidence": 0.030323436216454152,
|
|
"calibration/prompt_uniqueness": 0.49584634061741245,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001041666666666674,
|
|
"completions/max_length": 3055.6,
|
|
"completions/max_terminated_length": 3055.6,
|
|
"completions/mean_length": 653.0716186523438,
|
|
"completions/mean_terminated_length": 653.7612670898437,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 177.2,
|
|
"epoch": 0.47999400007499904,
|
|
"grad_norm": 0.00013980362564325333,
|
|
"learning_rate": 2.409638554216868e-07,
|
|
"loss": -0.0011,
|
|
"num_tokens": 428317068.0,
|
|
"reward": 1.0170727252960206,
|
|
"reward_std": 0.021385809779167174,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9963622570037842,
|
|
"rewards/confidence_uniqueness_reward": 0.5438861727714539,
|
|
"rewards/format_reward": 0.9989583253860473,
|
|
"rewards/frontier_aurc_reward": -0.007121744379401207,
|
|
"rewards/frontier_coverage_0": 0.9382775664329529,
|
|
"rewards/frontier_coverage_1": 0.9382775664329529,
|
|
"rewards/frontier_coverage_10": 0.910933256149292,
|
|
"rewards/frontier_coverage_15": 0.5705180168151855,
|
|
"rewards/frontier_coverage_20": 0.21805984079837798,
|
|
"rewards/frontier_coverage_25": 0.05167670994997024,
|
|
"rewards/frontier_coverage_5": 0.9366953253746033,
|
|
"rewards/frontier_ece_reward": -0.0011651593260467053,
|
|
"rewards/frontier_entropy_batch_reward": -0.9266958594322204,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.011997931264340877,
|
|
"signal/advantage_pre_scale_abs_mean": 0.011997931264340877,
|
|
"signal/advantage_pre_scale_std": 0.034786536172032353,
|
|
"signal/advantage_std": 0.034786536172032353,
|
|
"signal/brier_reward/centered_abs_mean": 0.005294179357588291,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6322916666666667,
|
|
"signal/brier_reward/group_std_mean": 0.010655418131500482,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0005294179252814501,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0005294179252814501,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1994739830493927,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8621527777777779,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.23068396151065826,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019947398081421853,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019947398081421853,
|
|
"signal/format_reward/centered_abs_mean": 0.0019965277635492383,
|
|
"signal/format_reward/group_bin_occupancy": 0.1284722222222222,
|
|
"signal/format_reward/group_std_mean": 0.005294674634933471,
|
|
"signal/format_reward/group_zero_std_frac": 0.9722222208976745,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0009982638817746191,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0009982638817746191,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 6.543248455272987e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7041666666666666,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00011322678910801187,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.179060500879132e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.179060500879132e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.05585807859897614,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.83125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.07585428953170777,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005585807748138905,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005585807748138905,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.05585807859897614,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.83125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.07585428953170777,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005585807748138905,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005585807748138905,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0549948088824749,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8322916666666667,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.07462709993124009,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005499481037259102,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005499481037259102,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.04226614907383919,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8364583333333332,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.056792113929986954,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004226614907383919,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004226614907383919,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.02481546886265278,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8503472222222221,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.03280436284840107,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024815469048917295,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024815469048917295,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.010957871191203594,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8864583333333333,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.014017502591013909,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010957871330901981,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010957871330901981,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.05581053644418717,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.83125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.07578854262828827,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005581053905189037,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005581053905189037,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.001521405391395092,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6770833333333334,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0023106941487640144,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00015214053855743258,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00015214053855743258,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1287894919514656,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.35,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.23831940293312073,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.11111111268401146,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01287894994020462,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01287894994020462,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.47999400007499904,
|
|
"eval_calibration/aurc": 1.0,
|
|
"eval_calibration/batch_distribution_entropy": 0.06643064451573505,
|
|
"eval_calibration/batch_entropy_100bins": 0.37397742139005613,
|
|
"eval_calibration/batch_entropy_10bins": 0.06643064451573505,
|
|
"eval_calibration/batch_entropy_50bins": 0.2916695775087659,
|
|
"eval_calibration/batch_uniqueness": 0.4544270833333333,
|
|
"eval_calibration/buffer_distribution_entropy": 0.2915210919064046,
|
|
"eval_calibration/buffer_entropy_100bins": 0.5463082971911503,
|
|
"eval_calibration/buffer_entropy_10bins": 0.2915210919064046,
|
|
"eval_calibration/buffer_entropy_50bins": 0.4722571553551534,
|
|
"eval_calibration/confidence_entropy": 0.09491457918095768,
|
|
"eval_calibration/coverage@0%": 0.0,
|
|
"eval_calibration/coverage@1%": 0.0,
|
|
"eval_calibration/coverage@10%": 0.0,
|
|
"eval_calibration/coverage@15%": 0.0,
|
|
"eval_calibration/coverage@20%": 0.0,
|
|
"eval_calibration/coverage@25%": 0.0,
|
|
"eval_calibration/coverage@30%": 0.0,
|
|
"eval_calibration/coverage@5%": 0.0,
|
|
"eval_calibration/ece": 0.023790659409756474,
|
|
"eval_calibration/mean_confidence": 0.023790659409756474,
|
|
"eval_calibration/prompt_uniqueness": 0.4544270833333333,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 1680.1666666666667,
|
|
"eval_completions/max_terminated_length": 1680.1666666666667,
|
|
"eval_completions/mean_length": 669.5234578450521,
|
|
"eval_completions/mean_terminated_length": 669.5234578450521,
|
|
"eval_completions/min_length": 236.83333333333334,
|
|
"eval_completions/min_terminated_length": 236.83333333333334,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 428317068.0,
|
|
"eval_reward": 0.9965925514698029,
|
|
"eval_reward_std": 0.024295299003521603,
|
|
"eval_rewards/accuracy_reward": 0.0,
|
|
"eval_rewards/brier_reward": 0.9971688687801361,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.4712456613779068,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.007382758737852176,
|
|
"eval_rewards/frontier_coverage_0": 0.9456672767798106,
|
|
"eval_rewards/frontier_coverage_1": 0.9456672767798106,
|
|
"eval_rewards/frontier_coverage_10": 0.8689454793930054,
|
|
"eval_rewards/frontier_coverage_15": 0.5284307897090912,
|
|
"eval_rewards/frontier_coverage_20": 0.20884332557519278,
|
|
"eval_rewards/frontier_coverage_25": 0.058971162885427475,
|
|
"eval_rewards/frontier_coverage_5": 0.9430893162886301,
|
|
"eval_rewards/frontier_ece_reward": -0.0011813394764127831,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 94.7926,
|
|
"eval_samples_per_second": 10.549,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/advantage_abs_mean": 0.016077665146440268,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.016077665146440268,
|
|
"eval_signal/advantage_pre_scale_std": 0.02679586907227834,
|
|
"eval_signal/advantage_std": 0.02679586907227834,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.004176080265703301,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.5763888888888888,
|
|
"eval_signal/brier_reward/group_std_mean": 0.009115726919844747,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00041760804257743683,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.00041760804257743683,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.22382948050896326,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7569444444444443,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.2574465771516164,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0223829485476017,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0223829485476017,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 9.999908312844734e-05,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6284722222222222,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0002476933683889608,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.2499885049995403e-06,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.2499885049995403e-06,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.050190938636660576,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.8090277777777778,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.07040310216446717,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005019093786055843,
|
|
"eval_signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005019093786055843,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.050190938636660576,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.8090277777777778,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.07040310216446717,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005019093786055843,
|
|
"eval_signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005019093786055843,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.048136645928025246,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.8090277777777778,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.06756559945642948,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004813664670412739,
|
|
"eval_signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004813664670412739,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.03631955695648988,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.8159722222222222,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.05081744554142157,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003631955711171031,
|
|
"eval_signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003631955711171031,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.021420936100184917,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8229166666666666,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.029625747663279373,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021420938040440283,
|
|
"eval_signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021420938040440283,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.011142984808733067,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.8472222222222222,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.0158317390208443,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001114298482813562,
|
|
"eval_signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001114298482813562,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.050127786894639335,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.8090277777777778,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.07031712743143241,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005012778720508019,
|
|
"eval_signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005012778720508019,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0016588448003555338,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.6215277777777778,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.002777168876491487,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0001658844824608726,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0001658844824608726,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.063,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.11455553726240084,
|
|
"calibration/batch_entropy_100bins": 0.45068975035694436,
|
|
"calibration/batch_entropy_10bins": 0.11455553726240084,
|
|
"calibration/batch_entropy_50bins": 0.35838251002340216,
|
|
"calibration/batch_uniqueness": 0.5393663194444445,
|
|
"calibration/buffer_distribution_entropy": 0.2649266196607571,
|
|
"calibration/buffer_entropy_100bins": 0.5311263627702383,
|
|
"calibration/buffer_entropy_10bins": 0.2649266196607571,
|
|
"calibration/buffer_entropy_50bins": 0.45441417672910356,
|
|
"calibration/confidence_entropy": 0.11045526044395729,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.029426482313059867,
|
|
"calibration/mean_confidence": 0.029426482313059867,
|
|
"calibration/prompt_uniqueness": 0.492578125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0002604166666666741,
|
|
"completions/max_length": 2353.0,
|
|
"completions/max_terminated_length": 2353.0,
|
|
"completions/mean_length": 679.0311645507812,
|
|
"completions/mean_terminated_length": 679.208349609375,
|
|
"completions/min_length": 122.0,
|
|
"completions/min_terminated_length": 189.2,
|
|
"epoch": 0.491993850076874,
|
|
"grad_norm": 5.876172872376628e-05,
|
|
"learning_rate": 9.036144578313253e-08,
|
|
"loss": -0.0004,
|
|
"num_tokens": 439205459.0,
|
|
"reward": 1.0090928316116332,
|
|
"reward_std": 0.017295997962355612,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9972542285919189,
|
|
"rewards/confidence_uniqueness_reward": 0.5266831457614899,
|
|
"rewards/format_reward": 0.9996527671813965,
|
|
"rewards/frontier_aurc_reward": -0.007521875947713852,
|
|
"rewards/frontier_coverage_0": 0.9413968443870544,
|
|
"rewards/frontier_coverage_1": 0.9413968443870544,
|
|
"rewards/frontier_coverage_10": 0.8523622274398803,
|
|
"rewards/frontier_coverage_15": 0.5419311404228211,
|
|
"rewards/frontier_coverage_20": 0.21406979262828826,
|
|
"rewards/frontier_coverage_25": 0.061385908722877504,
|
|
"rewards/frontier_coverage_5": 0.9381889939308167,
|
|
"rewards/frontier_ece_reward": -0.001526193623431027,
|
|
"rewards/frontier_entropy_batch_reward": -0.9195385575294495,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.010584606975317,
|
|
"signal/advantage_pre_scale_abs_mean": 0.010584606975317,
|
|
"signal/advantage_pre_scale_std": 0.023541892506182195,
|
|
"signal/advantage_std": 0.023541892506182195,
|
|
"signal/brier_reward/centered_abs_mean": 0.0038198365829885004,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6184027777777777,
|
|
"signal/brier_reward/group_std_mean": 0.006863884162157774,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0003819836885668337,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0003819836885668337,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.20393891334533693,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8586805555555556,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.23769011199474335,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020393891260027885,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020393891260027885,
|
|
"signal/format_reward/centered_abs_mean": 0.0006618923507630825,
|
|
"signal/format_reward/group_bin_occupancy": 0.12604166666666666,
|
|
"signal/format_reward/group_std_mean": 0.0016652445774525404,
|
|
"signal/format_reward/group_zero_std_frac": 0.9916666626930237,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00033094617538154125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00033094617538154125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 3.8273249083431436e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.648263888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 6.441681398428045e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.784156260484451e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.784156260484451e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.05274165198206902,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8149305555555555,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.07166333943605423,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005274165514856577,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005274165514856577,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.05274165198206902,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8149305555555555,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.07166333943605423,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005274165514856577,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005274165514856577,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.04995769932866097,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.815625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.06778565198183059,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004995770007371903,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004995770007371903,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.03851696029305458,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8184027777777777,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.051988587528467176,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038516961503773928,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038516961503773928,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.022696791961789132,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8371527777777779,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.03020997978746891,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002269679168239236,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002269679168239236,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.011402542889118194,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8784722222222223,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0147995101287961,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011402543168514967,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011402543168514967,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.052644898742437364,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8149305555555555,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.07152151316404343,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005264490097761154,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005264490097761154,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0019458664581179618,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6496527777777777,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0029517014976590873,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00019458665046840907,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00019458665046840907,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.14009268283843995,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3819444444444445,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24936519265174867,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.07777777910232545,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014009268768131734,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014009268768131734,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 1.0,
|
|
"calibration/batch_distribution_entropy": 0.11726611228589778,
|
|
"calibration/batch_entropy_100bins": 0.43786275906834105,
|
|
"calibration/batch_entropy_10bins": 0.11726611228589778,
|
|
"calibration/batch_entropy_50bins": 0.3445128998256206,
|
|
"calibration/batch_uniqueness": 0.5125325520833334,
|
|
"calibration/buffer_distribution_entropy": 0.21560621581407732,
|
|
"calibration/buffer_entropy_100bins": 0.5033980975058389,
|
|
"calibration/buffer_entropy_10bins": 0.21560621581407732,
|
|
"calibration/buffer_entropy_50bins": 0.42178915870920547,
|
|
"calibration/confidence_entropy": 0.10638714985174476,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.028447378988646913,
|
|
"calibration/mean_confidence": 0.028447378988646913,
|
|
"calibration/prompt_uniqueness": 0.46137152777777773,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0002893518518518601,
|
|
"completions/max_length": 2120.6666666666665,
|
|
"completions/max_terminated_length": 2120.6666666666665,
|
|
"completions/mean_length": 666.4303995768229,
|
|
"completions/mean_terminated_length": 666.6202799479166,
|
|
"completions/min_length": 67.0,
|
|
"completions/min_terminated_length": 170.0,
|
|
"epoch": 0.49919376007799904,
|
|
"num_tokens": 445677874.0,
|
|
"reward": 0.9926110506057739,
|
|
"reward_std": 0.01812468096613884,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.9972220460573832,
|
|
"rewards/confidence_uniqueness_reward": 0.5229232708613077,
|
|
"rewards/format_reward": 0.9997106393178304,
|
|
"rewards/frontier_aurc_reward": -0.007819035245726505,
|
|
"rewards/frontier_coverage_0": 0.9409451087315878,
|
|
"rewards/frontier_coverage_1": 0.9409451087315878,
|
|
"rewards/frontier_coverage_10": 0.7893781860669454,
|
|
"rewards/frontier_coverage_15": 0.5073119203249613,
|
|
"rewards/frontier_coverage_20": 0.18051361044247946,
|
|
"rewards/frontier_coverage_25": 0.049011316150426865,
|
|
"rewards/frontier_coverage_5": 0.9196064670880636,
|
|
"rewards/frontier_ece_reward": -0.0018021255576362212,
|
|
"rewards/frontier_entropy_batch_reward": -0.9175194899241129,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.125,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.010793880249063173,
|
|
"signal/advantage_pre_scale_abs_mean": 0.010793880249063173,
|
|
"signal/advantage_pre_scale_std": 0.023850775013367336,
|
|
"signal/advantage_std": 0.023850775013367336,
|
|
"signal/brier_reward/centered_abs_mean": 0.0038659116253256798,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6278935185185185,
|
|
"signal/brier_reward/group_std_mean": 0.0070017667797704535,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0003865911761143555,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0003865911761143555,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.21364787220954895,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.828125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.24747528632481894,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021364788214365642,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.021364788214365642,
|
|
"signal/format_reward/centered_abs_mean": 0.0005606192086512843,
|
|
"signal/format_reward/group_bin_occupancy": 0.1261574074074074,
|
|
"signal/format_reward/group_std_mean": 0.0016368211557467778,
|
|
"signal/format_reward/group_zero_std_frac": 0.9907407363255819,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00028030960432564217,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00028030960432564217,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 3.1314588947376855e-05,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6168981481481483,
|
|
"signal/frontier_aurc_reward/group_std_mean": 5.595714052712234e-05,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.914323466839657e-07,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.914323466839657e-07,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.05400566880901655,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8234953703703702,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.07307405769824982,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005400567160298427,
|
|
"signal/frontier_coverage_0/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005400567160298427,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.05400566880901655,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8234953703703702,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.07307405769824982,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005400567160298427,
|
|
"signal/frontier_coverage_1/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005400567160298427,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.048948156336943306,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8246527777777778,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.06624157354235649,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004894815851002932,
|
|
"signal/frontier_coverage_10/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004894815851002932,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.037971017261346184,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8252314814814815,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.05115088944633802,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003797101710612575,
|
|
"signal/frontier_coverage_15/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003797101710612575,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.021584200983246166,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8385416666666666,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.02855382238825162,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021584201992178955,
|
|
"signal/frontier_coverage_20/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021584201992178955,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.010639886682232222,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8790509259259259,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.013642584905028343,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010639886216570933,
|
|
"signal/frontier_coverage_25/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010639886216570933,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.05342509597539902,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8234953703703702,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.07227163016796112,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005342509442319472,
|
|
"signal/frontier_coverage_5/weight": 0.10000000149011612,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005342509442319472,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.002326072504123052,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6481481481481483,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.003536020793641607,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00023260726690447578,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00023260726690447578,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.14257381359736124,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3755787037037037,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24763726194699606,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0879629651705424,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014257381359736124,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014257381359736124,
|
|
"step": 208,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.004933846771494315,
|
|
"train_runtime": 36324.5396,
|
|
"train_samples_per_second": 0.413,
|
|
"train_steps_per_second": 0.006
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 208,
|
|
"num_input_tokens_seen": 445677874,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 6,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|