5721 lines
354 KiB
JSON
5721 lines
354 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.49919376007799904,
|
|
"eval_steps": 50,
|
|
"global_step": 208,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.514086638541351,
|
|
"calibration/batch_distribution_entropy": 0.27599249583875307,
|
|
"calibration/confidence_entropy": 0.2228992812774721,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4635714419377363,
|
|
"calibration/mean_confidence": 0.9145515300719154,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0203125,
|
|
"completions/max_length": 4017.2,
|
|
"completions/max_terminated_length": 4017.2,
|
|
"completions/mean_length": 517.5268188476563,
|
|
"completions/mean_terminated_length": 528.2714599609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.011999850001874977,
|
|
"grad_norm": 0.004794155713170767,
|
|
"learning_rate": 5.952380952380953e-07,
|
|
"loss": 0.008,
|
|
"num_tokens": 9076117.0,
|
|
"reward": 0.5741718530654907,
|
|
"reward_std": 0.5221742153167724,
|
|
"rewards/accuracy_reward": 0.2621527761220932,
|
|
"rewards/brier_reward": 0.31355856657028197,
|
|
"rewards/confidence_uniqueness_reward": 0.288547545671463,
|
|
"rewards/format_reward": 0.5970486044883728,
|
|
"rewards/frontier_aurc_reward": 0.27689927220344546,
|
|
"rewards/frontier_coverage_1": 0.27689927220344546,
|
|
"rewards/frontier_coverage_10": 0.27689927220344546,
|
|
"rewards/frontier_coverage_15": 0.27689927220344546,
|
|
"rewards/frontier_coverage_20": 0.27689927220344546,
|
|
"rewards/frontier_coverage_25": 0.27689927220344546,
|
|
"rewards/frontier_coverage_5": 0.27689927220344546,
|
|
"rewards/frontier_ece_reward": 0.27689927220344546,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.3104600667953491,
|
|
"signal/accuracy_reward/group_std_mean": 0.3717172920703888,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.07777777910232545,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15523003339767455,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15523003339767455,
|
|
"signal/advantage_abs_mean": 0.4487810075283051,
|
|
"signal/advantage_pre_scale_abs_mean": 0.4487810075283051,
|
|
"signal/advantage_pre_scale_std": 0.5276062607765197,
|
|
"signal/advantage_std": 0.5276062607765197,
|
|
"signal/brier_reward/centered_abs_mean": 0.32066049575805666,
|
|
"signal/brier_reward/group_std_mean": 0.3748934090137482,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04008256196975708,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.04008256196975708,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2354918897151947,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2874836504459381,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029436486214399336,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029436486214399336,
|
|
"signal/format_reward/centered_abs_mean": 0.4400065064430237,
|
|
"signal/format_reward/group_std_mean": 0.47453489899635315,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.22000325322151185,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.22000325322151185,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.31218199133872987,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3707219660282135,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005588056985288858,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005588056985288858,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.31218199133872987,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3707219660282135,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005588056985288858,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005588056985288858,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.31218199133872987,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3707219660282135,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005588056985288858,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005588056985288858,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.31218199133872987,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3707219660282135,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005588056985288858,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005588056985288858,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.31218199133872987,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3707219660282135,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005588056985288858,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005588056985288858,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.31218199133872987,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3707219660282135,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005588056985288858,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005588056985288858,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.31218199133872987,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3707219660282135,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005588056985288858,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005588056985288858,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.31218199133872987,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3707219660282135,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.039022748917341234,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.039022748917341234,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5280547395080012,
|
|
"calibration/batch_distribution_entropy": 0.25912112616058935,
|
|
"calibration/confidence_entropy": 0.22271742129617608,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.48657832278132584,
|
|
"calibration/mean_confidence": 0.921991283906116,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017881944444444443,
|
|
"completions/max_length": 4016.4,
|
|
"completions/max_terminated_length": 4016.4,
|
|
"completions/mean_length": 476.5828063964844,
|
|
"completions/mean_terminated_length": 485.360302734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 7.4,
|
|
"epoch": 0.023999700003749954,
|
|
"grad_norm": 0.024597520008683205,
|
|
"learning_rate": 1.1904761904761906e-06,
|
|
"loss": 0.0019,
|
|
"num_tokens": 17649071.0,
|
|
"reward": 0.667762839794159,
|
|
"reward_std": 0.48139882683753965,
|
|
"rewards/accuracy_reward": 0.2855034708976746,
|
|
"rewards/brier_reward": 0.35117203593254087,
|
|
"rewards/confidence_uniqueness_reward": 0.3601065635681152,
|
|
"rewards/format_reward": 0.7209201455116272,
|
|
"rewards/frontier_aurc_reward": 0.30220218896865847,
|
|
"rewards/frontier_coverage_1": 0.30220218896865847,
|
|
"rewards/frontier_coverage_10": 0.30220218896865847,
|
|
"rewards/frontier_coverage_15": 0.30220218896865847,
|
|
"rewards/frontier_coverage_20": 0.30220218896865847,
|
|
"rewards/frontier_coverage_25": 0.30220218896865847,
|
|
"rewards/frontier_coverage_5": 0.30220218896865847,
|
|
"rewards/frontier_ece_reward": 0.30220218896865847,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.3181260824203491,
|
|
"signal/accuracy_reward/group_std_mean": 0.37762491106987,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.0833333358168602,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15906304121017456,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15906304121017456,
|
|
"signal/advantage_abs_mean": 0.3990109622478485,
|
|
"signal/advantage_pre_scale_abs_mean": 0.3990109622478485,
|
|
"signal/advantage_pre_scale_std": 0.4871573269367218,
|
|
"signal/advantage_std": 0.4871573269367218,
|
|
"signal/brier_reward/centered_abs_mean": 0.3128700017929077,
|
|
"signal/brier_reward/group_std_mean": 0.3669971525669098,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03910875022411346,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03910875022411346,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2221683979034424,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.27797139883041383,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0277710497379303,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0277710497379303,
|
|
"signal/format_reward/centered_abs_mean": 0.34768337607383726,
|
|
"signal/format_reward/group_std_mean": 0.41295074224472045,
|
|
"signal/format_reward/group_zero_std_frac": 0.01388888917863369,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.17384168803691863,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.17384168803691863,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.31191812753677367,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.36915679574012755,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0055833343416452404,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0055833343416452404,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.31191812753677367,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.36915679574012755,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0055833343416452404,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0055833343416452404,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.31191812753677367,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.36915679574012755,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0055833343416452404,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0055833343416452404,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.31191812753677367,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.36915679574012755,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0055833343416452404,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0055833343416452404,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.31191812753677367,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.36915679574012755,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0055833343416452404,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0055833343416452404,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.31191812753677367,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.36915679574012755,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0055833343416452404,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0055833343416452404,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.31191812753677367,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.36915679574012755,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0055833343416452404,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0055833343416452404,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.31191812753677367,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.36915679574012755,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03898976594209671,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03898976594209671,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.567845924200485,
|
|
"calibration/batch_distribution_entropy": 0.3045943413639345,
|
|
"calibration/confidence_entropy": 0.24763369243483666,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.5280282173660021,
|
|
"calibration/mean_confidence": 0.9096602757540054,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009895833333333326,
|
|
"completions/max_length": 3921.8,
|
|
"completions/max_terminated_length": 3921.8,
|
|
"completions/mean_length": 413.9700561523438,
|
|
"completions/mean_terminated_length": 418.13662109375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 58.4,
|
|
"epoch": 0.03599955000562493,
|
|
"grad_norm": 0.0017141081625595689,
|
|
"learning_rate": 1.7857142857142859e-06,
|
|
"loss": -0.0112,
|
|
"num_tokens": 25519990.0,
|
|
"reward": 0.8229876399040222,
|
|
"reward_std": 0.37621534466743467,
|
|
"rewards/accuracy_reward": 0.3091145813465118,
|
|
"rewards/brier_reward": 0.4167811870574951,
|
|
"rewards/confidence_uniqueness_reward": 0.5070087611675262,
|
|
"rewards/format_reward": 0.9366319298744201,
|
|
"rewards/frontier_aurc_reward": 0.338156646490097,
|
|
"rewards/frontier_coverage_1": 0.338156646490097,
|
|
"rewards/frontier_coverage_10": 0.338156646490097,
|
|
"rewards/frontier_coverage_15": 0.338156646490097,
|
|
"rewards/frontier_coverage_20": 0.338156646490097,
|
|
"rewards/frontier_coverage_25": 0.338156646490097,
|
|
"rewards/frontier_coverage_5": 0.338156646490097,
|
|
"rewards/frontier_ece_reward": 0.338156646490097,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.32050238847732543,
|
|
"signal/accuracy_reward/group_std_mean": 0.37917629480361936,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.07777778059244156,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16025119423866271,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.16025119423866271,
|
|
"signal/advantage_abs_mean": 0.30312079191207886,
|
|
"signal/advantage_pre_scale_abs_mean": 0.30312079191207886,
|
|
"signal/advantage_pre_scale_std": 0.3861180067062378,
|
|
"signal/advantage_std": 0.3861180067062378,
|
|
"signal/brier_reward/centered_abs_mean": 0.29925207495689393,
|
|
"signal/brier_reward/group_std_mean": 0.35090850591659545,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03740650936961174,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03740650936961174,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.18491619527339936,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.23462865352630616,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02311452440917492,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02311452440917492,
|
|
"signal/format_reward/centered_abs_mean": 0.10967882126569747,
|
|
"signal/format_reward/group_std_mean": 0.19272871911525727,
|
|
"signal/format_reward/group_zero_std_frac": 0.2833333410322666,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05483941063284874,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.05483941063284874,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.3117563307285309,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.36737927198410036,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005580438114702701,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005580438114702701,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.3117563307285309,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.36737927198410036,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005580438114702701,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005580438114702701,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3117563307285309,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.36737927198410036,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005580438114702701,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005580438114702701,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3117563307285309,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.36737927198410036,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005580438114702701,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005580438114702701,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3117563307285309,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.36737927198410036,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005580438114702701,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005580438114702701,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3117563307285309,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.36737927198410036,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005580438114702701,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005580438114702701,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.3117563307285309,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.36737927198410036,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005580438114702701,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005580438114702701,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.3117563307285309,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.36737927198410036,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03896954134106636,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03896954134106636,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.47143046891943,
|
|
"calibration/batch_distribution_entropy": 0.4095787632848159,
|
|
"calibration/buffer_distribution_entropy": 0.3143981234334837,
|
|
"calibration/confidence_entropy": 0.31617773106851604,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.39434792860496753,
|
|
"calibration/mean_confidence": 0.8800821491423632,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009375,
|
|
"completions/max_length": 4021.8,
|
|
"completions/max_terminated_length": 4021.8,
|
|
"completions/mean_length": 432.70338134765626,
|
|
"completions/mean_terminated_length": 436.8342529296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 78.0,
|
|
"epoch": 0.04799940000749991,
|
|
"grad_norm": 0.001050017075613141,
|
|
"learning_rate": 2.380952380952381e-06,
|
|
"loss": -0.0105,
|
|
"num_tokens": 33618429.0,
|
|
"reward": 0.8896676540374756,
|
|
"reward_std": 0.28481470942497256,
|
|
"rewards/accuracy_reward": 0.4251736104488373,
|
|
"rewards/brier_reward": 0.5469748020172119,
|
|
"rewards/confidence_uniqueness_reward": 0.5929476499557496,
|
|
"rewards/format_reward": 0.985937488079071,
|
|
"rewards/frontier_aurc_reward": 0.16746631124988198,
|
|
"rewards/frontier_coverage_1": 0.1779847363010049,
|
|
"rewards/frontier_coverage_10": 0.1779847363010049,
|
|
"rewards/frontier_coverage_15": 0.1779847363010049,
|
|
"rewards/frontier_coverage_20": 0.1779847363010049,
|
|
"rewards/frontier_coverage_25": 0.1779847363010049,
|
|
"rewards/frontier_coverage_5": 0.1779847363010049,
|
|
"rewards/frontier_ece_reward": 0.15606855656951665,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2997070372104645,
|
|
"signal/accuracy_reward/group_std_mean": 0.3657579779624939,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.08055555820465088,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14985351860523224,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14985351860523224,
|
|
"signal/advantage_abs_mean": 0.2284111499786377,
|
|
"signal/advantage_pre_scale_abs_mean": 0.2284111499786377,
|
|
"signal/advantage_pre_scale_std": 0.2946593701839447,
|
|
"signal/advantage_std": 0.2946593701839447,
|
|
"signal/brier_reward/centered_abs_mean": 0.2612148314714432,
|
|
"signal/brier_reward/group_std_mean": 0.31739285588264465,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0326518539339304,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0326518539339304,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.186856010556221,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.22159543633460999,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023357001319527625,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023357001319527625,
|
|
"signal/format_reward/centered_abs_mean": 0.02544487789273262,
|
|
"signal/format_reward/group_std_mean": 0.05656049475073814,
|
|
"signal/format_reward/group_zero_std_frac": 0.7361111283302307,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01272243894636631,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01272243894636631,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.11768018077127636,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.14374305196106435,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0021064750850200652,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0021064750850200652,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13446774668991565,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17186392471194267,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024069725011941047,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024069725011941047,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13446774668991565,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17186392471194267,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024069725011941047,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024069725011941047,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13446774668991565,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17186392471194267,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024069725011941047,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024069725011941047,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13446774668991565,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.17186392471194267,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024069725011941047,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024069725011941047,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13446774668991565,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.17186392471194267,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024069725011941047,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024069725011941047,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13446774668991565,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17186392471194267,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024069725011941047,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024069725011941047,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.2141528308391571,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.2606072276830673,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.026769103854894637,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.026769103854894637,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.36235694806855867,
|
|
"calibration/batch_distribution_entropy": 0.5577564400694477,
|
|
"calibration/buffer_distribution_entropy": 0.3620251114970633,
|
|
"calibration/confidence_entropy": 0.3781988685889721,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.020526315789473684,
|
|
"calibration/coverage@15%": 0.020526315789473684,
|
|
"calibration/coverage@20%": 0.07789473684210527,
|
|
"calibration/coverage@25%": 0.1470138210247103,
|
|
"calibration/coverage@30%": 0.39850523935370885,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.27206986145822254,
|
|
"calibration/mean_confidence": 0.8404789311929516,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00980902777777779,
|
|
"completions/max_length": 3977.8,
|
|
"completions/max_terminated_length": 3977.8,
|
|
"completions/mean_length": 482.4750061035156,
|
|
"completions/mean_terminated_length": 487.28203735351565,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 79.6,
|
|
"epoch": 0.05999925000937488,
|
|
"grad_norm": 0.020144827663898468,
|
|
"learning_rate": 2.9761904761904763e-06,
|
|
"loss": -0.0045,
|
|
"num_tokens": 42300989.0,
|
|
"reward": 0.9261262536048889,
|
|
"reward_std": 0.2307106077671051,
|
|
"rewards/accuracy_reward": 0.5281249940395355,
|
|
"rewards/brier_reward": 0.6501283884048462,
|
|
"rewards/confidence_uniqueness_reward": 0.6791275620460511,
|
|
"rewards/format_reward": 0.9868055582046509,
|
|
"rewards/frontier_aurc_reward": -0.004348812019452452,
|
|
"rewards/frontier_coverage_1": 0.00460605913103791,
|
|
"rewards/frontier_coverage_10": 0.00460605913103791,
|
|
"rewards/frontier_coverage_15": 0.00460605913103791,
|
|
"rewards/frontier_coverage_20": 0.00460605913103791,
|
|
"rewards/frontier_coverage_25": 0.00460605913103791,
|
|
"rewards/frontier_coverage_5": 0.00460605913103791,
|
|
"rewards/frontier_ece_reward": 0.01669727308326401,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2728081583976746,
|
|
"signal/accuracy_reward/group_std_mean": 0.33785536885261536,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.12222222238779068,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1364040791988373,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1364040791988373,
|
|
"signal/advantage_abs_mean": 0.1812939614057541,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1812939614057541,
|
|
"signal/advantage_pre_scale_std": 0.24797289669513703,
|
|
"signal/advantage_std": 0.24797289669513703,
|
|
"signal/brier_reward/centered_abs_mean": 0.21097786724567413,
|
|
"signal/brier_reward/group_std_mean": 0.2636861175298691,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026372233405709267,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.026372233405709267,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1371775045990944,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1669593095779419,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0171471880748868,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0171471880748868,
|
|
"signal/format_reward/centered_abs_mean": 0.023567708767950536,
|
|
"signal/format_reward/group_std_mean": 0.047248493134975436,
|
|
"signal/format_reward/group_zero_std_frac": 0.7972222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011783854383975268,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.011783854383975268,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028534052427858113,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004276081500574946,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1075952796963975e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1075952796963975e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.03978681042790413,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.06365430131554603,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0007121838862076402,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0007121838862076402,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.03978681042790413,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.06365430131554603,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007121838862076402,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007121838862076402,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.03978681042790413,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.06365430131554603,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007121838862076402,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007121838862076402,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.03978681042790413,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06365430131554603,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007121838862076402,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007121838862076402,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.03978681042790413,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06365430131554603,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007121838862076402,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007121838862076402,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.03978681042790413,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.06365430131554603,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0007121838862076402,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0007121838862076402,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.1267393171787262,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.1571869283914566,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.015842414647340774,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.015842414647340774,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27921286600632367,
|
|
"calibration/batch_distribution_entropy": 0.6577298107673348,
|
|
"calibration/buffer_distribution_entropy": 0.44022902249815105,
|
|
"calibration/confidence_entropy": 0.47141779391590316,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.007065217391304347,
|
|
"calibration/coverage@15%": 0.08577783589696426,
|
|
"calibration/coverage@20%": 0.1485360821538392,
|
|
"calibration/coverage@25%": 0.23173721030472935,
|
|
"calibration/coverage@30%": 0.5571452261540231,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.13807863251820546,
|
|
"calibration/mean_confidence": 0.786612541696696,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017187499999999977,
|
|
"completions/max_length": 4053.8,
|
|
"completions/max_terminated_length": 4053.8,
|
|
"completions/mean_length": 555.9796997070313,
|
|
"completions/mean_terminated_length": 565.7698974609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 118.4,
|
|
"epoch": 0.07199910001124986,
|
|
"grad_norm": 0.0005177477723918855,
|
|
"learning_rate": 3.5714285714285718e-06,
|
|
"loss": -0.011,
|
|
"num_tokens": 51815795.0,
|
|
"reward": 0.9665445923805237,
|
|
"reward_std": 0.20279234647750854,
|
|
"rewards/accuracy_reward": 0.5907986044883728,
|
|
"rewards/brier_reward": 0.7121957659721374,
|
|
"rewards/confidence_uniqueness_reward": 0.7083804368972778,
|
|
"rewards/format_reward": 0.9802083253860474,
|
|
"rewards/frontier_aurc_reward": -0.0033660000655800102,
|
|
"rewards/frontier_coverage_1": -0.005025790445506573,
|
|
"rewards/frontier_coverage_10": -0.005025790445506573,
|
|
"rewards/frontier_coverage_15": -0.005025790445506573,
|
|
"rewards/frontier_coverage_20": -0.005025790445506573,
|
|
"rewards/frontier_coverage_25": -0.005025790445506573,
|
|
"rewards/frontier_coverage_5": -0.005025790445506573,
|
|
"rewards/frontier_ece_reward": 0.032553022354841234,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2300238698720932,
|
|
"signal/accuracy_reward/group_std_mean": 0.2947371512651443,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.18888889104127884,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1150119349360466,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1150119349360466,
|
|
"signal/advantage_abs_mean": 0.15309852957725525,
|
|
"signal/advantage_pre_scale_abs_mean": 0.15309852957725525,
|
|
"signal/advantage_pre_scale_std": 0.2273882269859314,
|
|
"signal/advantage_std": 0.2273882269859314,
|
|
"signal/brier_reward/centered_abs_mean": 0.16554278135299683,
|
|
"signal/brier_reward/group_std_mean": 0.21267394721508026,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020692847669124603,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020692847669124603,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11078131943941116,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.14092794060707092,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013847664929926395,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013847664929926395,
|
|
"signal/format_reward/centered_abs_mean": 0.03274739608168602,
|
|
"signal/format_reward/group_std_mean": 0.06030413955450058,
|
|
"signal/format_reward/group_zero_std_frac": 0.7555555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01637369804084301,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01637369804084301,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018913287669420243,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0029469260945916174,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.385478412383236e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.385478412383236e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.05050327777862549,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.07173062860965729,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0009040086762979627,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0009040086762979627,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.05050327777862549,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.07173062860965729,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0009040086762979627,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009040086762979627,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.05050327777862549,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.07173062860965729,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009040086762979627,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009040086762979627,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05050327777862549,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07173062860965729,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009040086762979627,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009040086762979627,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05050327777862549,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07173062860965729,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009040086762979627,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009040086762979627,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.05050327777862549,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.07173062860965729,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0009040086762979627,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0009040086762979627,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.08041608110070228,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.10180892795324326,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.010052010137587785,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.010052010137587785,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24546937930520882,
|
|
"calibration/batch_distribution_entropy": 0.7074476039153248,
|
|
"calibration/buffer_distribution_entropy": 0.5221484411007967,
|
|
"calibration/confidence_entropy": 0.5234833271299201,
|
|
"calibration/coverage@0%": 0.003183023872679045,
|
|
"calibration/coverage@1%": 0.003183023872679045,
|
|
"calibration/coverage@10%": 0.022281167108753316,
|
|
"calibration/coverage@15%": 0.13367375062180317,
|
|
"calibration/coverage@20%": 0.2989711554240587,
|
|
"calibration/coverage@25%": 0.547051785170009,
|
|
"calibration/coverage@30%": 0.8145723684210526,
|
|
"calibration/coverage@5%": 0.022281167108753316,
|
|
"calibration/ece": 0.09596886211635568,
|
|
"calibration/mean_confidence": 0.7358937641480917,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01571180555555558,
|
|
"completions/max_length": 4022.0,
|
|
"completions/max_terminated_length": 4022.0,
|
|
"completions/mean_length": 609.8512084960937,
|
|
"completions/mean_terminated_length": 619.6409912109375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 151.6,
|
|
"epoch": 0.08399895001312484,
|
|
"grad_norm": 0.0005236866418272257,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": -0.0102,
|
|
"num_tokens": 61918721.0,
|
|
"reward": 0.9899388790130615,
|
|
"reward_std": 0.1820593684911728,
|
|
"rewards/accuracy_reward": 0.6299479126930236,
|
|
"rewards/brier_reward": 0.7458477735519409,
|
|
"rewards/confidence_uniqueness_reward": 0.7157063841819763,
|
|
"rewards/format_reward": 0.98046875,
|
|
"rewards/frontier_aurc_reward": -0.002861540112644434,
|
|
"rewards/frontier_coverage_1": -0.013961865846067668,
|
|
"rewards/frontier_coverage_10": -0.013961865846067668,
|
|
"rewards/frontier_coverage_15": -0.013961865846067668,
|
|
"rewards/frontier_coverage_20": -0.013961865846067668,
|
|
"rewards/frontier_coverage_25": -0.013961865846067668,
|
|
"rewards/frontier_coverage_5": -0.013961865846067668,
|
|
"rewards/frontier_ece_reward": 0.02869575172662735,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.21008571982383728,
|
|
"signal/accuracy_reward/group_std_mean": 0.2665324449539185,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2805555611848831,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10504285991191864,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10504285991191864,
|
|
"signal/advantage_abs_mean": 0.13846542537212372,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13846542537212372,
|
|
"signal/advantage_pre_scale_std": 0.20890699625015258,
|
|
"signal/advantage_std": 0.20890699625015258,
|
|
"signal/brier_reward/centered_abs_mean": 0.14192103445529938,
|
|
"signal/brier_reward/group_std_mean": 0.1835268259048462,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017740129306912423,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017740129306912423,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1185295969247818,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.14487815797328948,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014816199615597724,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014816199615597724,
|
|
"signal/format_reward/centered_abs_mean": 0.03253580778837204,
|
|
"signal/format_reward/group_std_mean": 0.05597815439105034,
|
|
"signal/format_reward/group_zero_std_frac": 0.7888888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01626790389418602,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01626790389418602,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001540156383998692,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023954100906848907,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.756879803200718e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.756879803200718e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.06873494014143944,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.09156568795442581,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012303554220125079,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012303554220125079,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.06873494014143944,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.09156568795442581,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012303554220125079,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012303554220125079,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06873494014143944,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09156568795442581,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012303554220125079,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012303554220125079,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06873494014143944,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09156568795442581,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012303554220125079,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012303554220125079,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06873494014143944,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09156568795442581,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012303554220125079,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012303554220125079,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.06873494014143944,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.09156568795442581,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012303554220125079,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012303554220125079,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05747309401631355,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.074637171626091,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007184136752039194,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007184136752039194,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27279581724387775,
|
|
"calibration/batch_distribution_entropy": 0.7161346190573996,
|
|
"calibration/buffer_distribution_entropy": 0.5831439661286104,
|
|
"calibration/confidence_entropy": 0.5275154650697746,
|
|
"calibration/coverage@0%": 0.004199475065616798,
|
|
"calibration/coverage@1%": 0.004199475065616798,
|
|
"calibration/coverage@10%": 0.023622047244094488,
|
|
"calibration/coverage@15%": 0.06826086053227515,
|
|
"calibration/coverage@20%": 0.17686038278103913,
|
|
"calibration/coverage@25%": 0.3879944798344658,
|
|
"calibration/coverage@30%": 0.5670690424419453,
|
|
"calibration/coverage@5%": 0.004199475065616798,
|
|
"calibration/ece": 0.09923312232529506,
|
|
"calibration/mean_confidence": 0.7262206563584646,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014930555555555558,
|
|
"completions/max_length": 3704.8,
|
|
"completions/max_terminated_length": 3704.8,
|
|
"completions/mean_length": 634.3372436523438,
|
|
"completions/mean_terminated_length": 643.9880004882813,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 165.8,
|
|
"epoch": 0.09599880001499982,
|
|
"grad_norm": 0.0005042441189289093,
|
|
"learning_rate": 4.761904761904762e-06,
|
|
"loss": -0.0111,
|
|
"num_tokens": 72345806.0,
|
|
"reward": 1.0059186458587646,
|
|
"reward_std": 0.17311942875385283,
|
|
"rewards/accuracy_reward": 0.6474826335906982,
|
|
"rewards/brier_reward": 0.7549214363098145,
|
|
"rewards/confidence_uniqueness_reward": 0.7580878973007202,
|
|
"rewards/format_reward": 0.9828124880790711,
|
|
"rewards/frontier_aurc_reward": -0.002668565092608333,
|
|
"rewards/frontier_coverage_1": -0.018375001149252057,
|
|
"rewards/frontier_coverage_10": -0.018375001149252057,
|
|
"rewards/frontier_coverage_15": -0.018375001149252057,
|
|
"rewards/frontier_coverage_20": -0.018375001149252057,
|
|
"rewards/frontier_coverage_25": -0.018375001149252057,
|
|
"rewards/frontier_coverage_5": -0.018375001149252057,
|
|
"rewards/frontier_ece_reward": 0.029329166933894157,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19369032084941865,
|
|
"signal/accuracy_reward/group_std_mean": 0.25328629910945893,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2972222208976746,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09684516042470932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09684516042470932,
|
|
"signal/advantage_abs_mean": 0.1273159146308899,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1273159146308899,
|
|
"signal/advantage_pre_scale_std": 0.2019648015499115,
|
|
"signal/advantage_std": 0.2019648015499115,
|
|
"signal/brier_reward/centered_abs_mean": 0.141173791885376,
|
|
"signal/brier_reward/group_std_mean": 0.18503097891807557,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017646723985672,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017646723985672,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.10431279838085175,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.13210797309875488,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013039099797606469,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013039099797606469,
|
|
"signal/format_reward/centered_abs_mean": 0.02906901091337204,
|
|
"signal/format_reward/group_std_mean": 0.05360684543848038,
|
|
"signal/format_reward/group_zero_std_frac": 0.7833333492279053,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01453450545668602,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01453450545668602,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019563521724194287,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00321835745126009,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5018703420064413e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5018703420064413e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.08069958090782166,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.10802106261253357,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001444522407837212,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001444522407837212,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.08069958090782166,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.10802106261253357,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001444522407837212,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001444522407837212,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08069958090782166,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10802106261253357,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001444522407837212,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001444522407837212,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08069958090782166,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10802106261253357,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001444522407837212,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001444522407837212,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08069958090782166,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10802106261253357,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001444522407837212,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001444522407837212,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.08069958090782166,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.10802106261253357,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001444522407837212,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001444522407837212,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05297911018133163,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07072616964578629,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006622388772666454,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006622388772666454,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21362556514859965,
|
|
"calibration/batch_distribution_entropy": 0.7249410130483167,
|
|
"calibration/buffer_distribution_entropy": 0.6183385204926428,
|
|
"calibration/confidence_entropy": 0.4615105516463262,
|
|
"calibration/coverage@0%": 0.015706806282722512,
|
|
"calibration/coverage@1%": 0.015706806282722512,
|
|
"calibration/coverage@10%": 0.12766332802185293,
|
|
"calibration/coverage@15%": 0.26664864557250173,
|
|
"calibration/coverage@20%": 0.44956878151185337,
|
|
"calibration/coverage@25%": 0.7303769022780143,
|
|
"calibration/coverage@30%": 1.0,
|
|
"calibration/coverage@5%": 0.015706806282722512,
|
|
"calibration/ece": 0.10484668118363438,
|
|
"calibration/mean_confidence": 0.7590440665093695,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017013888888888884,
|
|
"completions/max_length": 3743.2,
|
|
"completions/max_terminated_length": 3743.2,
|
|
"completions/mean_length": 654.5295166015625,
|
|
"completions/mean_terminated_length": 665.859033203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.0,
|
|
"epoch": 0.1079986500168748,
|
|
"grad_norm": 0.00047160135000012815,
|
|
"learning_rate": 4.909638554216868e-06,
|
|
"loss": -0.0147,
|
|
"num_tokens": 83021250.0,
|
|
"reward": 1.0147064447402954,
|
|
"reward_std": 0.1708086371421814,
|
|
"rewards/accuracy_reward": 0.6501736164093017,
|
|
"rewards/brier_reward": 0.7591644525527954,
|
|
"rewards/confidence_uniqueness_reward": 0.8012210249900817,
|
|
"rewards/format_reward": 0.9823784589767456,
|
|
"rewards/frontier_aurc_reward": -0.0025879590306431056,
|
|
"rewards/frontier_coverage_1": -0.010252609569579362,
|
|
"rewards/frontier_coverage_10": -0.010252609569579362,
|
|
"rewards/frontier_coverage_15": -0.010252609569579362,
|
|
"rewards/frontier_coverage_20": -0.010252609569579362,
|
|
"rewards/frontier_coverage_25": -0.010252609569579362,
|
|
"rewards/frontier_coverage_5": -0.010252609569579362,
|
|
"rewards/frontier_ece_reward": 0.036237184703350064,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19266493022441863,
|
|
"signal/accuracy_reward/group_std_mean": 0.2547257900238037,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2805555611848831,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09633246511220932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09633246511220932,
|
|
"signal/advantage_abs_mean": 0.12555780559778212,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12555780559778212,
|
|
"signal/advantage_pre_scale_std": 0.19953626692295073,
|
|
"signal/advantage_std": 0.19953626692295073,
|
|
"signal/brier_reward/centered_abs_mean": 0.14807810485363007,
|
|
"signal/brier_reward/group_std_mean": 0.19255775809288025,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01850976310670376,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01850976310670376,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0964215949177742,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.12510152906179428,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012052699364721775,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012052699364721775,
|
|
"signal/format_reward/centered_abs_mean": 0.02882486991584301,
|
|
"signal/format_reward/group_std_mean": 0.05173756778240204,
|
|
"signal/format_reward/group_zero_std_frac": 0.794444453716278,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014412434957921504,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.014412434957921504,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028790227603167295,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004534664563834667,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.153450401849113e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.153450401849113e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.08745122700929642,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.11857426017522812,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015653769718483091,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015653769718483091,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.08745122700929642,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.11857426017522812,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015653769718483091,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015653769718483091,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08745122700929642,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11857426017522812,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015653769718483091,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015653769718483091,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08745122700929642,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11857426017522812,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015653769718483091,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015653769718483091,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08745122700929642,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11857426017522812,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015653769718483091,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015653769718483091,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.08745122700929642,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.11857426017522812,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015653769718483091,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015653769718483091,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05844959244132042,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07468613833189011,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007306199055165052,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007306199055165052,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3984856338347365,
|
|
"calibration/batch_distribution_entropy": 0.7660177852564631,
|
|
"calibration/buffer_distribution_entropy": 0.6392927797521206,
|
|
"calibration/confidence_entropy": 0.4476935003558987,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.02168021680216802,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.22976067826983196,
|
|
"calibration/mean_confidence": 0.7545236404911386,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015885416666666673,
|
|
"completions/max_length": 3047.4,
|
|
"completions/max_terminated_length": 3047.4,
|
|
"completions/mean_length": 653.1490600585937,
|
|
"completions/mean_terminated_length": 663.627978515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 182.2,
|
|
"epoch": 0.11999850001874976,
|
|
"grad_norm": 0.0005696824518963695,
|
|
"learning_rate": 4.759036144578314e-06,
|
|
"loss": -0.0146,
|
|
"num_tokens": 93643127.0,
|
|
"reward": 1.014067542552948,
|
|
"reward_std": 0.163968026638031,
|
|
"rewards/accuracy_reward": 0.6374131798744201,
|
|
"rewards/brier_reward": 0.7471580624580383,
|
|
"rewards/confidence_uniqueness_reward": 0.8459334373474121,
|
|
"rewards/format_reward": 0.9841145873069763,
|
|
"rewards/frontier_aurc_reward": -0.002862738911062479,
|
|
"rewards/frontier_coverage_1": -0.0022450896329246463,
|
|
"rewards/frontier_coverage_10": -0.0022450896329246463,
|
|
"rewards/frontier_coverage_15": -0.0022450896329246463,
|
|
"rewards/frontier_coverage_20": -0.0022450896329246463,
|
|
"rewards/frontier_coverage_25": -0.0022450896329246463,
|
|
"rewards/frontier_coverage_5": -0.0022450896329246463,
|
|
"rewards/frontier_ece_reward": 0.03567677363753319,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.181884765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.24014606773853303,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3194444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0909423828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0909423828125,
|
|
"signal/advantage_abs_mean": 0.12030979841947556,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12030979841947556,
|
|
"signal/advantage_pre_scale_std": 0.19647813737392425,
|
|
"signal/advantage_std": 0.19647813737392425,
|
|
"signal/brier_reward/centered_abs_mean": 0.15161194503307343,
|
|
"signal/brier_reward/group_std_mean": 0.1975580185651779,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01895149312913418,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01895149312913418,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08682139664888382,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.11508260518312455,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010852674581110477,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010852674581110477,
|
|
"signal/format_reward/centered_abs_mean": 0.02594943605363369,
|
|
"signal/format_reward/group_std_mean": 0.048407307267189024,
|
|
"signal/format_reward/group_zero_std_frac": 0.7972222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012974718026816845,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012974718026816845,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0035359882283955814,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005427685286849737,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.329418683890254e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.329418683890254e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.08081827610731125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.11278729438781739,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014466470805928112,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014466470805928112,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.08081827610731125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.11278729438781739,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014466470805928112,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014466470805928112,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08081827610731125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11278729438781739,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014466470805928112,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014466470805928112,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08081827610731125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11278729438781739,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014466470805928112,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014466470805928112,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08081827610731125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11278729438781739,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014466470805928112,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014466470805928112,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.08081827610731125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.11278729438781739,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014466470805928112,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014466470805928112,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05788221508264542,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07291264832019806,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007235276885330677,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007235276885330677,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.11999850001874976,
|
|
"eval_calibration/aurc": 0.22528452463011495,
|
|
"eval_calibration/batch_distribution_entropy": 0.7327676302222469,
|
|
"eval_calibration/buffer_distribution_entropy": 0.6531455892362769,
|
|
"eval_calibration/confidence_entropy": 0.45147091813373913,
|
|
"eval_calibration/coverage@0%": 0.12339829749103944,
|
|
"eval_calibration/coverage@1%": 0.12339829749103944,
|
|
"eval_calibration/coverage@10%": 0.2075716845878136,
|
|
"eval_calibration/coverage@15%": 0.2720990143369176,
|
|
"eval_calibration/coverage@20%": 0.4371975806451613,
|
|
"eval_calibration/coverage@25%": 0.6719758064516129,
|
|
"eval_calibration/coverage@30%": 0.9375,
|
|
"eval_calibration/coverage@5%": 0.12339829749103944,
|
|
"eval_calibration/ece": 0.1648209544670118,
|
|
"eval_calibration/mean_confidence": 0.7559882185052436,
|
|
"eval_completions/clipped_ratio": 0.018229166666666668,
|
|
"eval_completions/max_length": 2276.0,
|
|
"eval_completions/max_terminated_length": 2276.0,
|
|
"eval_completions/mean_length": 635.2345784505209,
|
|
"eval_completions/mean_terminated_length": 646.9802856445312,
|
|
"eval_completions/min_length": 51.666666666666664,
|
|
"eval_completions/min_terminated_length": 214.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 93643127.0,
|
|
"eval_reward": 1.0190295179684956,
|
|
"eval_reward_std": 0.29277849197387695,
|
|
"eval_rewards/accuracy_reward": 0.65625,
|
|
"eval_rewards/brier_reward": 0.7608515123526255,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8176768521467844,
|
|
"eval_rewards/format_reward": 0.980034718910853,
|
|
"eval_rewards/frontier_aurc_reward": -0.002395169634837657,
|
|
"eval_rewards/frontier_coverage_1": -0.006012833837303333,
|
|
"eval_rewards/frontier_coverage_10": -0.006012833837303333,
|
|
"eval_rewards/frontier_coverage_15": -0.006012833837303333,
|
|
"eval_rewards/frontier_coverage_20": -0.006012833837303333,
|
|
"eval_rewards/frontier_coverage_25": -0.006012833837303333,
|
|
"eval_rewards/frontier_coverage_5": -0.006012833837303333,
|
|
"eval_rewards/frontier_ece_reward": 0.034077832475304604,
|
|
"eval_runtime": 205.5337,
|
|
"eval_samples_per_second": 4.865,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4380425264437993,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4743858923514684,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21902126322189966,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21902126322189966,
|
|
"eval_signal/advantage_abs_mean": 0.2553912376364072,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2553912376364072,
|
|
"eval_signal/advantage_pre_scale_std": 0.29115622242291767,
|
|
"eval_signal/advantage_std": 0.29115622242291767,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.22910910844802856,
|
|
"eval_signal/brier_reward/group_std_mean": 0.28499897321065265,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02863863855600357,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02863863855600357,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.10962619632482529,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.15229293455680212,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01370327454060316,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01370327454060316,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.03803168454517921,
|
|
"eval_signal/format_reward/group_std_mean": 0.09500421459476154,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.5277777910232544,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.019015842272589605,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.019015842272589605,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0033296199593072138,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005719099193811417,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.9600197346298955e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.9600197346298955e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.10264075919985771,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.16208957880735397,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001837269403040409,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001837269403040409,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.10264075919985771,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.16208957880735397,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001837269403040409,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001837269403040409,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.10264075919985771,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.16208957880735397,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001837269403040409,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001837269403040409,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.10264075919985771,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.16208957880735397,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001837269403040409,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001837269403040409,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.10264075919985771,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.16208957880735397,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001837269403040409,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001837269403040409,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.10264075919985771,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.16208957880735397,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001837269403040409,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001837269403040409,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.06874044549961884,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.08980573217074077,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008592555687452355,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008592555687452355,
|
|
"eval_steps_per_second": 0.029,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30905254136426186,
|
|
"calibration/batch_distribution_entropy": 0.8075596443590124,
|
|
"calibration/buffer_distribution_entropy": 0.6633267710604588,
|
|
"calibration/confidence_entropy": 0.49110166338550343,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.3168765708000028,
|
|
"calibration/coverage@25%": 0.5018372703412074,
|
|
"calibration/coverage@30%": 0.5417322834645669,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.15189953369437614,
|
|
"calibration/mean_confidence": 0.7286333664825811,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015885416666666673,
|
|
"completions/max_length": 3439.6,
|
|
"completions/max_terminated_length": 3439.6,
|
|
"completions/mean_length": 658.5401977539062,
|
|
"completions/mean_terminated_length": 669.3446044921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 142.2,
|
|
"epoch": 0.13199835002062474,
|
|
"grad_norm": 0.00043861696030944586,
|
|
"learning_rate": 4.60843373493976e-06,
|
|
"loss": -0.0138,
|
|
"num_tokens": 104310086.0,
|
|
"reward": 1.0262351274490356,
|
|
"reward_std": 0.15907953977584838,
|
|
"rewards/accuracy_reward": 0.6447048544883728,
|
|
"rewards/brier_reward": 0.7629892230033875,
|
|
"rewards/confidence_uniqueness_reward": 0.9056627631187439,
|
|
"rewards/format_reward": 0.9841145873069763,
|
|
"rewards/frontier_aurc_reward": -0.002367356652393937,
|
|
"rewards/frontier_coverage_1": -0.0021971354028210042,
|
|
"rewards/frontier_coverage_10": -0.0021971354028210042,
|
|
"rewards/frontier_coverage_15": -0.0021971354028210042,
|
|
"rewards/frontier_coverage_20": -0.0021971354028210042,
|
|
"rewards/frontier_coverage_25": -0.0021971354028210042,
|
|
"rewards/frontier_coverage_5": -0.0021971354028210042,
|
|
"rewards/frontier_ece_reward": 0.02817784361541271,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18217773735523224,
|
|
"signal/accuracy_reward/group_std_mean": 0.23912697434425353,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.325,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09108886867761612,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09108886867761612,
|
|
"signal/advantage_abs_mean": 0.11837138533592224,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11837138533592224,
|
|
"signal/advantage_pre_scale_std": 0.1914423018693924,
|
|
"signal/advantage_std": 0.1914423018693924,
|
|
"signal/brier_reward/centered_abs_mean": 0.1487715631723404,
|
|
"signal/brier_reward/group_std_mean": 0.1938774347305298,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01859644539654255,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01859644539654255,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06237577944993973,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08584694117307663,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007796972431242466,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007796972431242466,
|
|
"signal/format_reward/centered_abs_mean": 0.0235948346555233,
|
|
"signal/format_reward/group_std_mean": 0.040967592224478724,
|
|
"signal/format_reward/group_zero_std_frac": 0.8416666746139526,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01179741732776165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01179741732776165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002605421328917146,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004233243642374873,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6637040941277516e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6637040941277516e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09448704570531845,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.13038320094347,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001691318047232926,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001691318047232926,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09448704570531845,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.13038320094347,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001691318047232926,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001691318047232926,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09448704570531845,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13038320094347,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001691318047232926,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001691318047232926,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09448704570531845,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.13038320094347,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001691318047232926,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001691318047232926,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09448704570531845,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13038320094347,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001691318047232926,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001691318047232926,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09448704570531845,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.13038320094347,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001691318047232926,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001691318047232926,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04688545688986778,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06142409965395927,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005860682111233473,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005860682111233473,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3345369982847062,
|
|
"calibration/batch_distribution_entropy": 0.7979723488465243,
|
|
"calibration/buffer_distribution_entropy": 0.6829944006341414,
|
|
"calibration/confidence_entropy": 0.4384303562223697,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.011548556430446194,
|
|
"calibration/coverage@15%": 0.2143167650918635,
|
|
"calibration/coverage@20%": 0.342257217847769,
|
|
"calibration/coverage@25%": 0.3931758530183727,
|
|
"calibration/coverage@30%": 0.43368983957219254,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.19744679022940095,
|
|
"calibration/mean_confidence": 0.7565729020638163,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01796875,
|
|
"completions/max_length": 3591.8,
|
|
"completions/max_terminated_length": 3591.8,
|
|
"completions/mean_length": 652.4442749023438,
|
|
"completions/mean_terminated_length": 664.4763916015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 170.8,
|
|
"epoch": 0.14399820002249972,
|
|
"grad_norm": 0.0004398068122100085,
|
|
"learning_rate": 4.457831325301205e-06,
|
|
"loss": -0.0164,
|
|
"num_tokens": 114922820.0,
|
|
"reward": 1.0199653863906861,
|
|
"reward_std": 0.1711801379919052,
|
|
"rewards/accuracy_reward": 0.6309895873069763,
|
|
"rewards/brier_reward": 0.7476680040359497,
|
|
"rewards/confidence_uniqueness_reward": 0.9258608222007751,
|
|
"rewards/format_reward": 0.9817708253860473,
|
|
"rewards/frontier_aurc_reward": -0.002807429013773799,
|
|
"rewards/frontier_coverage_1": 0.004628715617582202,
|
|
"rewards/frontier_coverage_10": 0.004628715617582202,
|
|
"rewards/frontier_coverage_15": 0.004628715617582202,
|
|
"rewards/frontier_coverage_20": 0.004628715617582202,
|
|
"rewards/frontier_coverage_25": 0.004628715617582202,
|
|
"rewards/frontier_coverage_5": 0.004628715617582202,
|
|
"rewards/frontier_ece_reward": 0.03157777301967144,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1884711354970932,
|
|
"signal/accuracy_reward/group_std_mean": 0.2461162716150284,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.31388889253139496,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0942355677485466,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0942355677485466,
|
|
"signal/advantage_abs_mean": 0.12647135853767394,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12647135853767394,
|
|
"signal/advantage_pre_scale_std": 0.2025492161512375,
|
|
"signal/advantage_std": 0.2025492161512375,
|
|
"signal/brier_reward/centered_abs_mean": 0.16874447762966155,
|
|
"signal/brier_reward/group_std_mean": 0.22043513357639313,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021093059703707694,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021093059703707694,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05148363336920738,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08058208972215652,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006435454171150923,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006435454171150923,
|
|
"signal/format_reward/centered_abs_mean": 0.03021918386220932,
|
|
"signal/format_reward/group_std_mean": 0.05589370355010033,
|
|
"signal/format_reward/group_zero_std_frac": 0.7694444537162781,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01510959193110466,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01510959193110466,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003583089355379343,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005511940456926823,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.413729715859517e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.413729715859517e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09764359593391418,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1448903352022171,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017478203400969506,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017478203400969506,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09764359593391418,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1448903352022171,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017478203400969506,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017478203400969506,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09764359593391418,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1448903352022171,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017478203400969506,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017478203400969506,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09764359593391418,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1448903352022171,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017478203400969506,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017478203400969506,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09764359593391418,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1448903352022171,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017478203400969506,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017478203400969506,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09764359593391418,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1448903352022171,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017478203400969506,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017478203400969506,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05283080860972404,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06684889793395996,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006603851076215505,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006603851076215505,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25178061436936255,
|
|
"calibration/batch_distribution_entropy": 0.782224239763399,
|
|
"calibration/buffer_distribution_entropy": 0.6979801492843933,
|
|
"calibration/confidence_entropy": 0.38540589096164324,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.025,
|
|
"calibration/coverage@15%": 0.326465196377979,
|
|
"calibration/coverage@20%": 0.6091465609610507,
|
|
"calibration/coverage@25%": 0.6931875417068663,
|
|
"calibration/coverage@30%": 0.7474254742547426,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.16355479654311317,
|
|
"calibration/mean_confidence": 0.7571751739178787,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017708333333333326,
|
|
"completions/max_length": 3540.0,
|
|
"completions/max_terminated_length": 3540.0,
|
|
"completions/mean_length": 614.4053955078125,
|
|
"completions/mean_terminated_length": 625.5040283203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 138.0,
|
|
"epoch": 0.1559980500243747,
|
|
"grad_norm": 0.0004897533799521625,
|
|
"learning_rate": 4.307228915662651e-06,
|
|
"loss": -0.0152,
|
|
"num_tokens": 125094818.0,
|
|
"reward": 1.0328210592269897,
|
|
"reward_std": 0.16226947605609893,
|
|
"rewards/accuracy_reward": 0.6543402791023254,
|
|
"rewards/brier_reward": 0.7571277022361755,
|
|
"rewards/confidence_uniqueness_reward": 0.9207069873809814,
|
|
"rewards/format_reward": 0.9821180462837219,
|
|
"rewards/frontier_aurc_reward": -0.0027203528210520745,
|
|
"rewards/frontier_coverage_1": 0.0034012388437986373,
|
|
"rewards/frontier_coverage_10": 0.0034012388437986373,
|
|
"rewards/frontier_coverage_15": 0.0034012388437986373,
|
|
"rewards/frontier_coverage_20": 0.0034012388437986373,
|
|
"rewards/frontier_coverage_25": 0.0034012388437986373,
|
|
"rewards/frontier_coverage_5": 0.0034012388437986373,
|
|
"rewards/frontier_ece_reward": 0.03636742420494556,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16812065839767457,
|
|
"signal/accuracy_reward/group_std_mean": 0.23046530783176422,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.31111111044883727,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08406032919883728,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08406032919883728,
|
|
"signal/advantage_abs_mean": 0.11638958901166915,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11638958901166915,
|
|
"signal/advantage_pre_scale_std": 0.19259226322174072,
|
|
"signal/advantage_std": 0.19259226322174072,
|
|
"signal/brier_reward/centered_abs_mean": 0.1747972458600998,
|
|
"signal/brier_reward/group_std_mean": 0.2305249333381653,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021849655732512473,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021849655732512473,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.056257478147745135,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08205792903900147,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007032184768468142,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007032184768468142,
|
|
"signal/format_reward/centered_abs_mean": 0.02969835065305233,
|
|
"signal/format_reward/group_std_mean": 0.05096030831336975,
|
|
"signal/format_reward/group_zero_std_frac": 0.8083333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014849175326526165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.014849175326526165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032653619535267355,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004880654439330101,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.844997795065865e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.844997795065865e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1121538296341896,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16928686797618867,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002007553423754871,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002007553423754871,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1121538296341896,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16928686797618867,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002007553423754871,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002007553423754871,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1121538296341896,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16928686797618867,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002007553423754871,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002007553423754871,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1121538296341896,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16928686797618867,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002007553423754871,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002007553423754871,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1121538296341896,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16928686797618867,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002007553423754871,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002007553423754871,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1121538296341896,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16928686797618867,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002007553423754871,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002007553423754871,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05328927487134934,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06582499742507934,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006661159358918667,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006661159358918667,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2951920429356995,
|
|
"calibration/batch_distribution_entropy": 0.6789171581871921,
|
|
"calibration/buffer_distribution_entropy": 0.7060502188884911,
|
|
"calibration/confidence_entropy": 0.3652801822043075,
|
|
"calibration/coverage@0%": 0.016976127320954906,
|
|
"calibration/coverage@1%": 0.016976127320954906,
|
|
"calibration/coverage@10%": 0.11777188328912466,
|
|
"calibration/coverage@15%": 0.13580901856763924,
|
|
"calibration/coverage@20%": 0.15119363395225466,
|
|
"calibration/coverage@25%": 0.2842619485873057,
|
|
"calibration/coverage@30%": 0.5154877819661559,
|
|
"calibration/coverage@5%": 0.070026525198939,
|
|
"calibration/ece": 0.19455041095714468,
|
|
"calibration/mean_confidence": 0.806789845714594,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.020138888888888908,
|
|
"completions/max_length": 3470.6,
|
|
"completions/max_terminated_length": 3470.6,
|
|
"completions/mean_length": 599.9177124023438,
|
|
"completions/mean_terminated_length": 612.2438720703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 124.2,
|
|
"epoch": 0.16799790002624967,
|
|
"grad_norm": 0.00047923889360390604,
|
|
"learning_rate": 4.156626506024097e-06,
|
|
"loss": -0.0193,
|
|
"num_tokens": 135084014.0,
|
|
"reward": 1.017272448539734,
|
|
"reward_std": 0.16175627410411836,
|
|
"rewards/accuracy_reward": 0.6318576335906982,
|
|
"rewards/brier_reward": 0.7436864018440247,
|
|
"rewards/confidence_uniqueness_reward": 0.9008719086647033,
|
|
"rewards/format_reward": 0.9797742962837219,
|
|
"rewards/frontier_aurc_reward": -0.0031225522980093956,
|
|
"rewards/frontier_coverage_1": 0.014106421242468059,
|
|
"rewards/frontier_coverage_10": 0.014106421242468059,
|
|
"rewards/frontier_coverage_15": 0.014106421242468059,
|
|
"rewards/frontier_coverage_20": 0.014106421242468059,
|
|
"rewards/frontier_coverage_25": 0.014106421242468059,
|
|
"rewards/frontier_coverage_5": 0.014106421242468059,
|
|
"rewards/frontier_ece_reward": 0.03542088866233826,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1743109792470932,
|
|
"signal/accuracy_reward/group_std_mean": 0.22547804117202758,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0871554896235466,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0871554896235466,
|
|
"signal/advantage_abs_mean": 0.12052754461765289,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12052754461765289,
|
|
"signal/advantage_pre_scale_std": 0.20319488048553466,
|
|
"signal/advantage_std": 0.20319488048553466,
|
|
"signal/brier_reward/centered_abs_mean": 0.1648993283510208,
|
|
"signal/brier_reward/group_std_mean": 0.2110469877719879,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0206124160438776,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0206124160438776,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06894725412130356,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.097543103992939,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008618406765162945,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008618406765162945,
|
|
"signal/format_reward/centered_abs_mean": 0.03365342915058136,
|
|
"signal/format_reward/group_std_mean": 0.05752314925193787,
|
|
"signal/format_reward/group_zero_std_frac": 0.7833333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01682671457529068,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01682671457529068,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0033782635815441608,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004819054994732142,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.047091592336074e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.047091592336074e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.07730035781860352,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.11400771141052246,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013836764032021166,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013836764032021166,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.07730035781860352,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.11400771141052246,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013836764032021166,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013836764032021166,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07730035781860352,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11400771141052246,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013836764032021166,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013836764032021166,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07730035781860352,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11400771141052246,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013836764032021166,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013836764032021166,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07730035781860352,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11400771141052246,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013836764032021166,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013836764032021166,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.07730035781860352,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.11400771141052246,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013836764032021166,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013836764032021166,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0494035005569458,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.060732795298099516,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006175437569618225,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006175437569618225,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23588326946851917,
|
|
"calibration/batch_distribution_entropy": 0.7336437964325994,
|
|
"calibration/buffer_distribution_entropy": 0.7058757308269206,
|
|
"calibration/confidence_entropy": 0.4050379491719296,
|
|
"calibration/coverage@0%": 0.0026345104155062935,
|
|
"calibration/coverage@1%": 0.0026345104155062935,
|
|
"calibration/coverage@10%": 0.15062862394804186,
|
|
"calibration/coverage@15%": 0.24837791370120765,
|
|
"calibration/coverage@20%": 0.5237080040614946,
|
|
"calibration/coverage@25%": 0.6888860829670989,
|
|
"calibration/coverage@30%": 0.7517615176151762,
|
|
"calibration/coverage@5%": 0.0026345104155062935,
|
|
"calibration/ece": 0.14133860252060226,
|
|
"calibration/mean_confidence": 0.7841849798052773,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015538194444444441,
|
|
"completions/max_length": 2772.4,
|
|
"completions/max_terminated_length": 2772.4,
|
|
"completions/mean_length": 612.7648559570313,
|
|
"completions/mean_terminated_length": 622.4307373046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 174.4,
|
|
"epoch": 0.17999775002812465,
|
|
"grad_norm": 0.0004477369075175375,
|
|
"learning_rate": 4.006024096385543e-06,
|
|
"loss": -0.0129,
|
|
"num_tokens": 145207961.0,
|
|
"reward": 1.0510570764541627,
|
|
"reward_std": 0.15537019968032836,
|
|
"rewards/accuracy_reward": 0.6852430582046509,
|
|
"rewards/brier_reward": 0.7796695828437805,
|
|
"rewards/confidence_uniqueness_reward": 0.9134960055351258,
|
|
"rewards/format_reward": 0.9844618082046509,
|
|
"rewards/frontier_aurc_reward": -0.002246162900701165,
|
|
"rewards/frontier_coverage_1": 0.0013872329844161868,
|
|
"rewards/frontier_coverage_10": 0.0013872329844161868,
|
|
"rewards/frontier_coverage_15": 0.0013872329844161868,
|
|
"rewards/frontier_coverage_20": 0.0013872329844161868,
|
|
"rewards/frontier_coverage_25": 0.0013872329844161868,
|
|
"rewards/frontier_coverage_5": 0.0013872329844161868,
|
|
"rewards/frontier_ece_reward": 0.03560146205127239,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17493489682674407,
|
|
"signal/accuracy_reward/group_std_mean": 0.2285703092813492,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.36111111640930177,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08746744841337203,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08746744841337203,
|
|
"signal/advantage_abs_mean": 0.11331436336040497,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11331436336040497,
|
|
"signal/advantage_pre_scale_std": 0.19401153922080994,
|
|
"signal/advantage_std": 0.19401153922080994,
|
|
"signal/brier_reward/centered_abs_mean": 0.1502958595752716,
|
|
"signal/brier_reward/group_std_mean": 0.19655809700489044,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01878698244690895,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01878698244690895,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05595709830522537,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08529313653707504,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006994637288153171,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006994637288153171,
|
|
"signal/format_reward/centered_abs_mean": 0.02707790769636631,
|
|
"signal/format_reward/group_std_mean": 0.05181853100657463,
|
|
"signal/format_reward/group_zero_std_frac": 0.7861111402511597,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013538953848183155,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013538953848183155,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002420555287972093,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003638601349666715,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.332793687353842e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.332793687353842e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.08835429251194,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.12635914981365204,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015815417747944594,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015815417747944594,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.08835429251194,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.12635914981365204,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015815417747944594,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015815417747944594,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08835429251194,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.12635914981365204,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015815417747944594,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015815417747944594,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08835429251194,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12635914981365204,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015815417747944594,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015815417747944594,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08835429251194,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12635914981365204,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015815417747944594,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015815417747944594,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.08835429251194,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.12635914981365204,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015815417747944594,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015815417747944594,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.042579871416091916,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05289793238043785,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0053224839270114895,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0053224839270114895,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2283627656944609,
|
|
"calibration/batch_distribution_entropy": 0.8158849488071735,
|
|
"calibration/buffer_distribution_entropy": 0.7136887505869486,
|
|
"calibration/confidence_entropy": 0.4471504385566205,
|
|
"calibration/coverage@0%": 0.020931500872600352,
|
|
"calibration/coverage@1%": 0.020931500872600352,
|
|
"calibration/coverage@10%": 0.27006980802792324,
|
|
"calibration/coverage@15%": 0.3453125,
|
|
"calibration/coverage@20%": 0.3697916666666667,
|
|
"calibration/coverage@25%": 0.6741954607046071,
|
|
"calibration/coverage@30%": 0.7745257452574525,
|
|
"calibration/coverage@5%": 0.09882744328097731,
|
|
"calibration/ece": 0.14655096164643444,
|
|
"calibration/mean_confidence": 0.7371357132363405,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017100694444444443,
|
|
"completions/max_length": 3731.6,
|
|
"completions/max_terminated_length": 3731.6,
|
|
"completions/mean_length": 658.69453125,
|
|
"completions/mean_terminated_length": 670.2767578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 141.0,
|
|
"epoch": 0.19199760002999963,
|
|
"grad_norm": 0.0004507862322498113,
|
|
"learning_rate": 3.855421686746989e-06,
|
|
"loss": -0.0163,
|
|
"num_tokens": 155849402.0,
|
|
"reward": 1.0312724709510803,
|
|
"reward_std": 0.1551019161939621,
|
|
"rewards/accuracy_reward": 0.6509548664093018,
|
|
"rewards/brier_reward": 0.7650970101356507,
|
|
"rewards/confidence_uniqueness_reward": 0.9270597219467163,
|
|
"rewards/format_reward": 0.982812511920929,
|
|
"rewards/frontier_aurc_reward": -0.0021514812484383582,
|
|
"rewards/frontier_coverage_1": 0.0006764297373592854,
|
|
"rewards/frontier_coverage_10": 0.0006764297373592854,
|
|
"rewards/frontier_coverage_15": 0.0006764297373592854,
|
|
"rewards/frontier_coverage_20": 0.0006764297373592854,
|
|
"rewards/frontier_coverage_25": 0.0006764297373592854,
|
|
"rewards/frontier_coverage_5": 0.0006764297373592854,
|
|
"rewards/frontier_ece_reward": 0.022680159099400042,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18104926347732545,
|
|
"signal/accuracy_reward/group_std_mean": 0.23661141991615295,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.33611112236976626,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09052463173866272,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09052463173866272,
|
|
"signal/advantage_abs_mean": 0.11439994722604752,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11439994722604752,
|
|
"signal/advantage_pre_scale_std": 0.1880962520837784,
|
|
"signal/advantage_std": 0.1880962520837784,
|
|
"signal/brier_reward/centered_abs_mean": 0.15392581820487977,
|
|
"signal/brier_reward/group_std_mean": 0.19755606949329377,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01924072727560997,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01924072727560997,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.045452053844928744,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.070206418633461,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005681506730616093,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005681506730616093,
|
|
"signal/format_reward/centered_abs_mean": 0.027105035632848738,
|
|
"signal/format_reward/group_std_mean": 0.04782758429646492,
|
|
"signal/format_reward/group_zero_std_frac": 0.8166666626930237,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013552517816424369,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013552517816424369,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020432798657566307,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003136290283873677,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.657470879261382e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.657470879261382e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12025301158428192,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16328471302986144,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021525288466364145,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021525288466364145,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12025301158428192,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16328471302986144,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021525288466364145,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021525288466364145,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12025301158428192,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16328471302986144,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021525288466364145,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021525288466364145,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.12025301158428192,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16328471302986144,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021525288466364145,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021525288466364145,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12025301158428192,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16328471302986144,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021525288466364145,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021525288466364145,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12025301158428192,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16328471302986144,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021525288466364145,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021525288466364145,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.034540054574608806,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.044214902073144914,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004317506821826101,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004317506821826101,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19863731386705097,
|
|
"calibration/batch_distribution_entropy": 0.8784851844174906,
|
|
"calibration/buffer_distribution_entropy": 0.7286079857168308,
|
|
"calibration/confidence_entropy": 0.47194029223098666,
|
|
"calibration/coverage@0%": 0.006957876563311484,
|
|
"calibration/coverage@1%": 0.006957876563311484,
|
|
"calibration/coverage@10%": 0.21343171212142478,
|
|
"calibration/coverage@15%": 0.31718648568982216,
|
|
"calibration/coverage@20%": 0.5108775185596135,
|
|
"calibration/coverage@25%": 0.8152747734527581,
|
|
"calibration/coverage@30%": 0.9188776441194223,
|
|
"calibration/coverage@5%": 0.06903902044177439,
|
|
"calibration/ece": 0.11392621986175389,
|
|
"calibration/mean_confidence": 0.685149595819355,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017447916666666674,
|
|
"completions/max_length": 3531.0,
|
|
"completions/max_terminated_length": 3531.0,
|
|
"completions/mean_length": 672.5692749023438,
|
|
"completions/mean_terminated_length": 684.5512817382812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 181.8,
|
|
"epoch": 0.2039974500318746,
|
|
"grad_norm": 0.00040943012572824955,
|
|
"learning_rate": 3.7048192771084342e-06,
|
|
"loss": -0.0158,
|
|
"num_tokens": 166684600.0,
|
|
"reward": 1.0490335464477538,
|
|
"reward_std": 0.15231254696846008,
|
|
"rewards/accuracy_reward": 0.6825520753860473,
|
|
"rewards/brier_reward": 0.7915989398956299,
|
|
"rewards/confidence_uniqueness_reward": 0.9125476956367493,
|
|
"rewards/format_reward": 0.9823784708976746,
|
|
"rewards/frontier_aurc_reward": -0.0017510119127109648,
|
|
"rewards/frontier_coverage_1": 0.0037823686841875316,
|
|
"rewards/frontier_coverage_10": 0.0037823686841875316,
|
|
"rewards/frontier_coverage_15": 0.0037823686841875316,
|
|
"rewards/frontier_coverage_20": 0.0037823686841875316,
|
|
"rewards/frontier_coverage_25": 0.0037823686841875316,
|
|
"rewards/frontier_coverage_5": 0.0037823686841875316,
|
|
"rewards/frontier_ece_reward": 0.025400371849536897,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1764702707529068,
|
|
"signal/accuracy_reward/group_std_mean": 0.22980018258094786,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.35555556416511536,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0882351353764534,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0882351353764534,
|
|
"signal/advantage_abs_mean": 0.10985312461853028,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10985312461853028,
|
|
"signal/advantage_pre_scale_std": 0.19002984464168549,
|
|
"signal/advantage_std": 0.19002984464168549,
|
|
"signal/brier_reward/centered_abs_mean": 0.14407358169555665,
|
|
"signal/brier_reward/group_std_mean": 0.18776251673698424,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01800919771194458,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01800919771194458,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.051011984795331956,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08202408254146576,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0063764980994164945,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0063764980994164945,
|
|
"signal/format_reward/centered_abs_mean": 0.03092990405857563,
|
|
"signal/format_reward/group_std_mean": 0.05930749401450157,
|
|
"signal/format_reward/group_zero_std_frac": 0.7555555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015464952029287816,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.015464952029287816,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017428799066692591,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002763870591297746,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.119755092484411e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.119755092484411e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11682608127593994,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1566822350025177,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020911867963150144,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020911867963150144,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11682608127593994,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1566822350025177,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020911867963150144,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020911867963150144,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11682608127593994,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1566822350025177,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020911867963150144,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020911867963150144,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11682608127593994,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1566822350025177,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020911867963150144,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020911867963150144,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11682608127593994,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1566822350025177,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020911867963150144,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020911867963150144,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11682608127593994,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1566822350025177,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020911867963150144,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020911867963150144,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03192468658089638,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04087934568524361,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0039905858226120475,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0039905858226120475,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1621043276555874,
|
|
"calibration/batch_distribution_entropy": 0.7713018291348717,
|
|
"calibration/buffer_distribution_entropy": 0.7391826748921617,
|
|
"calibration/confidence_entropy": 0.3906538953529201,
|
|
"calibration/coverage@0%": 0.01748775430290532,
|
|
"calibration/coverage@1%": 0.01748775430290532,
|
|
"calibration/coverage@10%": 0.29865454137224134,
|
|
"calibration/coverage@15%": 0.4961013783380685,
|
|
"calibration/coverage@20%": 0.7170695539701011,
|
|
"calibration/coverage@25%": 0.8760416319073021,
|
|
"calibration/coverage@30%": 0.9350359579568505,
|
|
"calibration/coverage@5%": 0.12930449114246848,
|
|
"calibration/ece": 0.10879461725676424,
|
|
"calibration/mean_confidence": 0.7703561898492359,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.018055555555555537,
|
|
"completions/max_length": 3415.8,
|
|
"completions/max_terminated_length": 3415.8,
|
|
"completions/mean_length": 657.1771728515625,
|
|
"completions/mean_terminated_length": 669.2972290039063,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 158.0,
|
|
"epoch": 0.2159973000337496,
|
|
"grad_norm": 0.00045981750008650124,
|
|
"learning_rate": 3.5542168674698798e-06,
|
|
"loss": -0.0165,
|
|
"num_tokens": 177323953.0,
|
|
"reward": 1.048110580444336,
|
|
"reward_std": 0.1468990057706833,
|
|
"rewards/accuracy_reward": 0.6861111164093018,
|
|
"rewards/brier_reward": 0.786482310295105,
|
|
"rewards/confidence_uniqueness_reward": 0.8900201439857482,
|
|
"rewards/format_reward": 0.9819444417953491,
|
|
"rewards/frontier_aurc_reward": -0.002023177081719041,
|
|
"rewards/frontier_coverage_1": 0.006752363312989474,
|
|
"rewards/frontier_coverage_10": 0.006752363312989474,
|
|
"rewards/frontier_coverage_15": 0.006752363312989474,
|
|
"rewards/frontier_coverage_20": 0.006752363312989474,
|
|
"rewards/frontier_coverage_25": 0.006752363312989474,
|
|
"rewards/frontier_coverage_5": 0.006752363312989474,
|
|
"rewards/frontier_ece_reward": 0.03064808137714863,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16185981035232544,
|
|
"signal/accuracy_reward/group_std_mean": 0.21465785503387452,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08092990517616272,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08092990517616272,
|
|
"signal/advantage_abs_mean": 0.10632715672254563,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10632715672254563,
|
|
"signal/advantage_pre_scale_std": 0.18754963874816893,
|
|
"signal/advantage_std": 0.18754963874816893,
|
|
"signal/brier_reward/centered_abs_mean": 0.14517129957675934,
|
|
"signal/brier_reward/group_std_mean": 0.1876837819814682,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018146412447094917,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018146412447094917,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0636248804628849,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0918489396572113,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007953110057860613,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007953110057860613,
|
|
"signal/format_reward/centered_abs_mean": 0.03050130233168602,
|
|
"signal/format_reward/group_std_mean": 0.054274033010005954,
|
|
"signal/format_reward/group_zero_std_frac": 0.7888888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01525065116584301,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01525065116584301,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024305079132318497,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003738354705274105,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.3506090150913224e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.3506090150913224e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09725135415792466,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1383568376302719,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017407992389053106,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017407992389053106,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09725135415792466,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1383568376302719,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017407992389053106,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017407992389053106,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09725135415792466,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1383568376302719,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017407992389053106,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017407992389053106,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09725135415792466,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1383568376302719,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017407992389053106,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017407992389053106,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09725135415792466,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1383568376302719,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017407992389053106,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017407992389053106,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09725135415792466,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1383568376302719,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017407992389053106,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017407992389053106,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03363135680556297,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04208812639117241,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0042039196006953715,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0042039196006953715,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22302830877145974,
|
|
"calibration/batch_distribution_entropy": 0.7654984420303647,
|
|
"calibration/buffer_distribution_entropy": 0.7426643403332136,
|
|
"calibration/confidence_entropy": 0.37485479191168325,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.32827225130890053,
|
|
"calibration/coverage@15%": 0.44633415268512344,
|
|
"calibration/coverage@20%": 0.5211039690113962,
|
|
"calibration/coverage@25%": 0.5988238926626621,
|
|
"calibration/coverage@30%": 0.70975130522652,
|
|
"calibration/coverage@5%": 0.10628272251308901,
|
|
"calibration/ece": 0.17951479772284673,
|
|
"calibration/mean_confidence": 0.7752378849995252,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010850694444444465,
|
|
"completions/max_length": 3244.6,
|
|
"completions/max_terminated_length": 3244.6,
|
|
"completions/mean_length": 665.167529296875,
|
|
"completions/mean_terminated_length": 672.4716430664063,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 154.6,
|
|
"epoch": 0.22799715003562457,
|
|
"grad_norm": 0.00041399727342650294,
|
|
"learning_rate": 3.4036144578313257e-06,
|
|
"loss": -0.0103,
|
|
"num_tokens": 188078363.0,
|
|
"reward": 1.0430236101150512,
|
|
"reward_std": 0.1384707883000374,
|
|
"rewards/accuracy_reward": 0.6696180582046509,
|
|
"rewards/brier_reward": 0.7802430033683777,
|
|
"rewards/confidence_uniqueness_reward": 0.885105288028717,
|
|
"rewards/format_reward": 0.989062488079071,
|
|
"rewards/frontier_aurc_reward": -0.002526196092367172,
|
|
"rewards/frontier_coverage_1": 0.017078271601349115,
|
|
"rewards/frontier_coverage_10": 0.017078271601349115,
|
|
"rewards/frontier_coverage_15": 0.017078271601349115,
|
|
"rewards/frontier_coverage_20": 0.017078271601349115,
|
|
"rewards/frontier_coverage_25": 0.017078271601349115,
|
|
"rewards/frontier_coverage_5": 0.017078271601349115,
|
|
"rewards/frontier_ece_reward": 0.029806675761938094,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15270182192325593,
|
|
"signal/accuracy_reward/group_std_mean": 0.2024629831314087,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07635091096162797,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07635091096162797,
|
|
"signal/advantage_abs_mean": 0.09986221194267272,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09986221194267272,
|
|
"signal/advantage_pre_scale_std": 0.17722425758838653,
|
|
"signal/advantage_std": 0.17722425758838653,
|
|
"signal/brier_reward/centered_abs_mean": 0.14648381173610686,
|
|
"signal/brier_reward/group_std_mean": 0.19169094264507294,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018310476467013358,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018310476467013358,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06063394397497177,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08702098578214645,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007579242996871472,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007579242996871472,
|
|
"signal/format_reward/centered_abs_mean": 0.019715712033212186,
|
|
"signal/format_reward/group_std_mean": 0.039822696894407275,
|
|
"signal/format_reward/group_zero_std_frac": 0.8305555701255798,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009857856016606093,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009857856016606093,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003348661307245493,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005126806069165468,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.994103412376717e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.994103412376717e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.08807848840951919,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.126033778488636,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00157660492695868,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00157660492695868,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.08807848840951919,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.126033778488636,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00157660492695868,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00157660492695868,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08807848840951919,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.126033778488636,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00157660492695868,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00157660492695868,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08807848840951919,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.126033778488636,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00157660492695868,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00157660492695868,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08807848840951919,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.126033778488636,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00157660492695868,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00157660492695868,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.08807848840951919,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.126033778488636,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00157660492695868,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00157660492695868,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03316693603992462,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04110720306634903,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0041458670049905775,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0041458670049905775,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19005778525037625,
|
|
"calibration/batch_distribution_entropy": 0.7905120036990934,
|
|
"calibration/buffer_distribution_entropy": 0.7454486867596737,
|
|
"calibration/confidence_entropy": 0.41916534905811276,
|
|
"calibration/coverage@0%": 0.01983628239499553,
|
|
"calibration/coverage@1%": 0.01983628239499553,
|
|
"calibration/coverage@10%": 0.12477490240346174,
|
|
"calibration/coverage@15%": 0.3196656977564555,
|
|
"calibration/coverage@20%": 0.7049851243582956,
|
|
"calibration/coverage@25%": 0.9022022628372499,
|
|
"calibration/coverage@30%": 0.9375679721496954,
|
|
"calibration/coverage@5%": 0.049326899016979446,
|
|
"calibration/ece": 0.09743477958458721,
|
|
"calibration/mean_confidence": 0.7563954787317189,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015364583333333303,
|
|
"completions/max_length": 3783.6,
|
|
"completions/max_terminated_length": 3783.6,
|
|
"completions/mean_length": 696.7359375,
|
|
"completions/mean_terminated_length": 707.5933471679688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 184.8,
|
|
"epoch": 0.23999700003749952,
|
|
"grad_norm": 0.00038863401277922094,
|
|
"learning_rate": 3.2530120481927713e-06,
|
|
"loss": -0.0138,
|
|
"num_tokens": 199203833.0,
|
|
"reward": 1.0556352138519287,
|
|
"reward_std": 0.1429404079914093,
|
|
"rewards/accuracy_reward": 0.6901041626930237,
|
|
"rewards/brier_reward": 0.8032171607017518,
|
|
"rewards/confidence_uniqueness_reward": 0.8976126194000245,
|
|
"rewards/format_reward": 0.9845486164093018,
|
|
"rewards/frontier_aurc_reward": -0.0018692356767132877,
|
|
"rewards/frontier_coverage_1": 0.02305122137913713,
|
|
"rewards/frontier_coverage_10": 0.02305122137913713,
|
|
"rewards/frontier_coverage_15": 0.02305122137913713,
|
|
"rewards/frontier_coverage_20": 0.02305122137913713,
|
|
"rewards/frontier_coverage_25": 0.02305122137913713,
|
|
"rewards/frontier_coverage_5": 0.02305122137913713,
|
|
"rewards/frontier_ece_reward": 0.02610306181013584,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16201171576976775,
|
|
"signal/accuracy_reward/group_std_mean": 0.21425627470016478,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08100585788488388,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08100585788488388,
|
|
"signal/advantage_abs_mean": 0.10346845239400863,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10346845239400863,
|
|
"signal/advantage_pre_scale_std": 0.18151322603225709,
|
|
"signal/advantage_std": 0.18151322603225709,
|
|
"signal/brier_reward/centered_abs_mean": 0.13857089430093766,
|
|
"signal/brier_reward/group_std_mean": 0.18216053247451783,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017321361787617208,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017321361787617208,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.050343307107686995,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07695924490690231,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006292913388460874,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006292913388460874,
|
|
"signal/format_reward/centered_abs_mean": 0.02498914934694767,
|
|
"signal/format_reward/group_std_mean": 0.04789231047034263,
|
|
"signal/format_reward/group_zero_std_frac": 0.8,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012494574673473835,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012494574673473835,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00248065204359591,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0040627093985676765,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.440367265488021e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.440367265488021e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09468016475439071,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.13442795127630233,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016947749769315123,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016947749769315123,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09468016475439071,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.13442795127630233,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016947749769315123,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016947749769315123,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09468016475439071,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13442795127630233,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016947749769315123,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016947749769315123,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09468016475439071,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.13442795127630233,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016947749769315123,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016947749769315123,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09468016475439071,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13442795127630233,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016947749769315123,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016947749769315123,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09468016475439071,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.13442795127630233,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016947749769315123,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016947749769315123,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.028183171153068544,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.035510845482349396,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003522896394133568,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003522896394133568,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.23999700003749952,
|
|
"eval_calibration/aurc": 0.15891660441180894,
|
|
"eval_calibration/batch_distribution_entropy": 0.6959708379422341,
|
|
"eval_calibration/buffer_distribution_entropy": 0.7499945272337074,
|
|
"eval_calibration/confidence_entropy": 0.414659227708472,
|
|
"eval_calibration/coverage@0%": 0.19452284946236562,
|
|
"eval_calibration/coverage@1%": 0.19452284946236562,
|
|
"eval_calibration/coverage@10%": 0.3979166666666667,
|
|
"eval_calibration/coverage@15%": 0.6486335125448028,
|
|
"eval_calibration/coverage@20%": 0.7757056451612904,
|
|
"eval_calibration/coverage@25%": 0.8178763440860215,
|
|
"eval_calibration/coverage@30%": 0.9625336021505376,
|
|
"eval_calibration/coverage@5%": 0.24828629032258065,
|
|
"eval_calibration/ece": 0.1466347668793299,
|
|
"eval_calibration/mean_confidence": 0.7744951179912419,
|
|
"eval_completions/clipped_ratio": 0.012152777777777771,
|
|
"eval_completions/max_length": 2663.8333333333335,
|
|
"eval_completions/max_terminated_length": 2663.8333333333335,
|
|
"eval_completions/mean_length": 688.960947672526,
|
|
"eval_completions/mean_terminated_length": 697.5240783691406,
|
|
"eval_completions/min_length": 46.333333333333336,
|
|
"eval_completions/min_terminated_length": 219.66666666666666,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 199203833.0,
|
|
"eval_reward": 1.0394453605016072,
|
|
"eval_reward_std": 0.2735634073615074,
|
|
"eval_rewards/accuracy_reward": 0.6710069477558136,
|
|
"eval_rewards/brier_reward": 0.7972110112508138,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8433377345403036,
|
|
"eval_rewards/format_reward": 0.9869791666666666,
|
|
"eval_rewards/frontier_aurc_reward": -0.002003069695395728,
|
|
"eval_rewards/frontier_coverage_1": 0.026348761282861233,
|
|
"eval_rewards/frontier_coverage_10": 0.026348761282861233,
|
|
"eval_rewards/frontier_coverage_15": 0.026348761282861233,
|
|
"eval_rewards/frontier_coverage_20": 0.026348761282861233,
|
|
"eval_rewards/frontier_coverage_25": 0.026348761282861233,
|
|
"eval_rewards/frontier_coverage_5": 0.026348761282861233,
|
|
"eval_rewards/frontier_ece_reward": 0.0207175404454271,
|
|
"eval_runtime": 205.6415,
|
|
"eval_samples_per_second": 4.863,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4276801198720932,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4686971952517827,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2138400599360466,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2138400599360466,
|
|
"eval_signal/advantage_abs_mean": 0.237903726597627,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.237903726597627,
|
|
"eval_signal/advantage_pre_scale_std": 0.2721952473123868,
|
|
"eval_signal/advantage_std": 0.2721952473123868,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.22816414137681326,
|
|
"eval_signal/brier_reward/group_std_mean": 0.28985429803530377,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028520517672101658,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.028520517672101658,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07482141132156055,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.11087949698170026,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009352676415195068,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009352676415195068,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.024793836598594982,
|
|
"eval_signal/format_reward/group_std_mean": 0.06416239465276401,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.6666666865348816,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.012396918299297491,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.012396918299297491,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0030194248538464308,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005542080150917172,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4047704907134175e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4047704907134175e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.1204829066991806,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.20749556769927344,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002156644050652782,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002156644050652782,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.1204829066991806,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.20749556769927344,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002156644050652782,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002156644050652782,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.1204829066991806,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.20749556769927344,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002156644050652782,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002156644050652782,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.1204829066991806,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.20749556769927344,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002156644050652782,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002156644050652782,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.1204829066991806,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.20749556769927344,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002156644050652782,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002156644050652782,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.1204829066991806,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.20749556769927344,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002156644050652782,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002156644050652782,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.03396274273594221,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.04516912375887235,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004245342841992776,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004245342841992776,
|
|
"eval_steps_per_second": 0.029,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3174403591072565,
|
|
"calibration/batch_distribution_entropy": 0.7832574028492738,
|
|
"calibration/buffer_distribution_entropy": 0.7522402415093571,
|
|
"calibration/confidence_entropy": 0.4539565113764869,
|
|
"calibration/coverage@0%": 0.015250790160780538,
|
|
"calibration/coverage@1%": 0.015250790160780538,
|
|
"calibration/coverage@10%": 0.14032004223834288,
|
|
"calibration/coverage@15%": 0.15768846329097447,
|
|
"calibration/coverage@20%": 0.25616122896219523,
|
|
"calibration/coverage@25%": 0.32953986243608197,
|
|
"calibration/coverage@30%": 0.3985706226539276,
|
|
"calibration/coverage@5%": 0.08104026384499106,
|
|
"calibration/ece": 0.17285030321480427,
|
|
"calibration/mean_confidence": 0.7526070795683661,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015104166666666651,
|
|
"completions/max_length": 3606.8,
|
|
"completions/max_terminated_length": 3606.8,
|
|
"completions/mean_length": 710.5170166015625,
|
|
"completions/mean_terminated_length": 721.425048828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 164.6,
|
|
"epoch": 0.2519968500393745,
|
|
"grad_norm": 0.0004076336626894772,
|
|
"learning_rate": 3.1024096385542172e-06,
|
|
"loss": -0.0122,
|
|
"num_tokens": 210465853.0,
|
|
"reward": 1.0506530284881592,
|
|
"reward_std": 0.13989392966032027,
|
|
"rewards/accuracy_reward": 0.68125,
|
|
"rewards/brier_reward": 0.8004813671112061,
|
|
"rewards/confidence_uniqueness_reward": 0.9040295124053955,
|
|
"rewards/format_reward": 0.9848958373069763,
|
|
"rewards/frontier_aurc_reward": -0.0015720528550446033,
|
|
"rewards/frontier_coverage_1": 0.018485220894217492,
|
|
"rewards/frontier_coverage_10": 0.018485220894217492,
|
|
"rewards/frontier_coverage_15": 0.018485220894217492,
|
|
"rewards/frontier_coverage_20": 0.018485220894217492,
|
|
"rewards/frontier_coverage_25": 0.018485220894217492,
|
|
"rewards/frontier_coverage_5": 0.018485220894217492,
|
|
"rewards/frontier_ece_reward": 0.0204722385853529,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15923394113779069,
|
|
"signal/accuracy_reward/group_std_mean": 0.21463679075241088,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07961697056889534,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07961697056889534,
|
|
"signal/advantage_abs_mean": 0.09950301349163056,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09950301349163056,
|
|
"signal/advantage_pre_scale_std": 0.17661311626434326,
|
|
"signal/advantage_std": 0.17661311626434326,
|
|
"signal/brier_reward/centered_abs_mean": 0.13340435177087784,
|
|
"signal/brier_reward/group_std_mean": 0.17492244243621827,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01667554397135973,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01667554397135973,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.047524832934141156,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07183988243341446,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0059406041167676445,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0059406041167676445,
|
|
"signal/format_reward/centered_abs_mean": 0.024891493655741215,
|
|
"signal/format_reward/group_std_mean": 0.04560527727007866,
|
|
"signal/format_reward/group_zero_std_frac": 0.8166666865348816,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012445746827870608,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012445746827870608,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016542579047381877,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002758215693756938,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9611215722979978e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9611215722979978e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09999113231897354,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.13899571299552918,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017898412188515067,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017898412188515067,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09999113231897354,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.13899571299552918,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017898412188515067,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017898412188515067,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09999113231897354,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13899571299552918,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017898412188515067,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017898412188515067,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09999113231897354,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.13899571299552918,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017898412188515067,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017898412188515067,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09999113231897354,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13899571299552918,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017898412188515067,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017898412188515067,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09999113231897354,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.13899571299552918,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017898412188515067,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017898412188515067,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02349744737148285,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.030407802015542985,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002937180921435356,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002937180921435356,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1980429971425905,
|
|
"calibration/batch_distribution_entropy": 0.8188233169740314,
|
|
"calibration/buffer_distribution_entropy": 0.7578019516298132,
|
|
"calibration/confidence_entropy": 0.4403369866103337,
|
|
"calibration/coverage@0%": 0.04398981835652027,
|
|
"calibration/coverage@1%": 0.04398981835652027,
|
|
"calibration/coverage@10%": 0.2766707394091519,
|
|
"calibration/coverage@15%": 0.3595005740521652,
|
|
"calibration/coverage@20%": 0.4683630213987621,
|
|
"calibration/coverage@25%": 0.6562042932608049,
|
|
"calibration/coverage@30%": 0.8698795999435456,
|
|
"calibration/coverage@5%": 0.15361152888283608,
|
|
"calibration/ece": 0.11835691165409723,
|
|
"calibration/mean_confidence": 0.7310259669183279,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013541666666666652,
|
|
"completions/max_length": 3426.8,
|
|
"completions/max_terminated_length": 3426.8,
|
|
"completions/mean_length": 731.7519897460937,
|
|
"completions/mean_terminated_length": 741.8162719726563,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 207.4,
|
|
"epoch": 0.2639967000412495,
|
|
"grad_norm": 0.0004139347583986819,
|
|
"learning_rate": 2.9518072289156627e-06,
|
|
"loss": -0.0117,
|
|
"num_tokens": 222004084.0,
|
|
"reward": 1.0684332370758056,
|
|
"reward_std": 0.1274869754910469,
|
|
"rewards/accuracy_reward": 0.7131944537162781,
|
|
"rewards/brier_reward": 0.813305401802063,
|
|
"rewards/confidence_uniqueness_reward": 0.9080566763877869,
|
|
"rewards/format_reward": 0.9864583373069763,
|
|
"rewards/frontier_aurc_reward": -0.001346051273867488,
|
|
"rewards/frontier_coverage_1": 0.008798408973962068,
|
|
"rewards/frontier_coverage_10": 0.008798408973962068,
|
|
"rewards/frontier_coverage_15": 0.008798408973962068,
|
|
"rewards/frontier_coverage_20": 0.008798408973962068,
|
|
"rewards/frontier_coverage_25": 0.008798408973962068,
|
|
"rewards/frontier_coverage_5": 0.008798408973962068,
|
|
"rewards/frontier_ece_reward": 0.02012586295604706,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14842664897441865,
|
|
"signal/accuracy_reward/group_std_mean": 0.19679024815559387,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43888888955116273,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07421332448720933,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07421332448720933,
|
|
"signal/advantage_abs_mean": 0.09238868355751037,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09238868355751037,
|
|
"signal/advantage_pre_scale_std": 0.1690053313970566,
|
|
"signal/advantage_std": 0.1690053313970566,
|
|
"signal/brier_reward/centered_abs_mean": 0.12360798418521882,
|
|
"signal/brier_reward/group_std_mean": 0.16248373985290526,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015450998023152352,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015450998023152352,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04519175067543983,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06818027943372726,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005648968834429979,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005648968834429979,
|
|
"signal/format_reward/centered_abs_mean": 0.022352430410683154,
|
|
"signal/format_reward/group_std_mean": 0.04120796211063862,
|
|
"signal/format_reward/group_zero_std_frac": 0.8333333492279053,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011176215205341577,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.011176215205341577,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013575590681284666,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022294150665402414,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4300306176883168e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4300306176883168e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10466690212488175,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1473758965730667,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018735374789685011,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018735374789685011,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10466690212488175,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1473758965730667,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018735374789685011,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018735374789685011,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10466690212488175,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1473758965730667,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018735374789685011,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018735374789685011,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10466690212488175,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1473758965730667,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018735374789685011,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018735374789685011,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10466690212488175,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1473758965730667,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018735374789685011,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018735374789685011,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10466690212488175,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1473758965730667,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018735374789685011,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018735374789685011,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.022222984954714774,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.028566232323646544,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027778731193393467,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027778731193393467,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2674647745371999,
|
|
"calibration/batch_distribution_entropy": 0.8655340385503951,
|
|
"calibration/buffer_distribution_entropy": 0.7650680762501356,
|
|
"calibration/confidence_entropy": 0.46264810399743606,
|
|
"calibration/coverage@0%": 0.0356302909735624,
|
|
"calibration/coverage@1%": 0.0356302909735624,
|
|
"calibration/coverage@10%": 0.11016870839079522,
|
|
"calibration/coverage@15%": 0.22985091263385246,
|
|
"calibration/coverage@20%": 0.4752261244438419,
|
|
"calibration/coverage@25%": 0.5995528351424358,
|
|
"calibration/coverage@30%": 0.66065353496751,
|
|
"calibration/coverage@5%": 0.059629899612001526,
|
|
"calibration/ece": 0.16247371201899063,
|
|
"calibration/mean_confidence": 0.6835954949209723,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01284722222222221,
|
|
"completions/max_length": 3594.4,
|
|
"completions/max_terminated_length": 3594.4,
|
|
"completions/mean_length": 743.1428955078125,
|
|
"completions/mean_terminated_length": 752.9280395507812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 198.4,
|
|
"epoch": 0.27599655004312446,
|
|
"grad_norm": 0.0003546822990756482,
|
|
"learning_rate": 2.8012048192771087e-06,
|
|
"loss": -0.0119,
|
|
"num_tokens": 233644290.0,
|
|
"reward": 1.045598602294922,
|
|
"reward_std": 0.1324237823486328,
|
|
"rewards/accuracy_reward": 0.665538203716278,
|
|
"rewards/brier_reward": 0.7980833888053894,
|
|
"rewards/confidence_uniqueness_reward": 0.9122416257858277,
|
|
"rewards/format_reward": 0.9870659828186035,
|
|
"rewards/frontier_aurc_reward": -0.0016012408072128893,
|
|
"rewards/frontier_coverage_1": 0.03013449099380523,
|
|
"rewards/frontier_coverage_10": 0.03013449099380523,
|
|
"rewards/frontier_coverage_15": 0.03013449099380523,
|
|
"rewards/frontier_coverage_20": 0.03013449099380523,
|
|
"rewards/frontier_coverage_25": 0.03013449099380523,
|
|
"rewards/frontier_coverage_5": 0.03013449099380523,
|
|
"rewards/frontier_ece_reward": 0.018384577706456184,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16057400405406952,
|
|
"signal/accuracy_reward/group_std_mean": 0.20613610446453096,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43055555820465086,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08028700202703476,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08028700202703476,
|
|
"signal/advantage_abs_mean": 0.09785276204347611,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09785276204347611,
|
|
"signal/advantage_pre_scale_std": 0.17164961993694305,
|
|
"signal/advantage_std": 0.17164961993694305,
|
|
"signal/brier_reward/centered_abs_mean": 0.13958249241113663,
|
|
"signal/brier_reward/group_std_mean": 0.17782102823257445,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01744781155139208,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01744781155139208,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0446560338139534,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06761002168059349,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005582004226744175,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005582004226744175,
|
|
"signal/format_reward/centered_abs_mean": 0.021869575139135122,
|
|
"signal/format_reward/group_std_mean": 0.04040019139647484,
|
|
"signal/format_reward/group_zero_std_frac": 0.8388888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010934787569567561,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010934787569567561,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001572295231744647,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002514668833464384,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8144082898506893e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8144082898506893e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12204509526491165,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16676346063613892,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002184607065282762,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002184607065282762,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12204509526491165,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16676346063613892,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002184607065282762,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002184607065282762,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12204509526491165,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16676346063613892,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002184607065282762,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002184607065282762,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.12204509526491165,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16676346063613892,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002184607065282762,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002184607065282762,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12204509526491165,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16676346063613892,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002184607065282762,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002184607065282762,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12204509526491165,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16676346063613892,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002184607065282762,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002184607065282762,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.022244375944137574,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.028506366163492204,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027805469930171967,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027805469930171967,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2672984725852937,
|
|
"calibration/batch_distribution_entropy": 0.8792197933707804,
|
|
"calibration/buffer_distribution_entropy": 0.7740401221561556,
|
|
"calibration/confidence_entropy": 0.430121035681503,
|
|
"calibration/coverage@0%": 0.027200066755723284,
|
|
"calibration/coverage@1%": 0.027200066755723284,
|
|
"calibration/coverage@10%": 0.2044821067474349,
|
|
"calibration/coverage@15%": 0.42163062243374394,
|
|
"calibration/coverage@20%": 0.5291554748877976,
|
|
"calibration/coverage@25%": 0.5711437336692795,
|
|
"calibration/coverage@30%": 0.5952792553191489,
|
|
"calibration/coverage@5%": 0.13044019833467066,
|
|
"calibration/ece": 0.15999251951129834,
|
|
"calibration/mean_confidence": 0.6908585197289003,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013281250000000022,
|
|
"completions/max_length": 3213.6,
|
|
"completions/max_terminated_length": 3213.6,
|
|
"completions/mean_length": 733.0212890625,
|
|
"completions/mean_terminated_length": 742.8743530273438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 226.0,
|
|
"epoch": 0.28799640004499943,
|
|
"grad_norm": 0.00029223994351923466,
|
|
"learning_rate": 2.6506024096385547e-06,
|
|
"loss": -0.012,
|
|
"num_tokens": 245170551.0,
|
|
"reward": 1.0578627586364746,
|
|
"reward_std": 0.13152444064617158,
|
|
"rewards/accuracy_reward": 0.6881944417953492,
|
|
"rewards/brier_reward": 0.8041746258735657,
|
|
"rewards/confidence_uniqueness_reward": 0.9153994798660279,
|
|
"rewards/format_reward": 0.9867187380790711,
|
|
"rewards/frontier_aurc_reward": -0.0015686721657402814,
|
|
"rewards/frontier_coverage_1": 0.027476230938918888,
|
|
"rewards/frontier_coverage_10": 0.027476230938918888,
|
|
"rewards/frontier_coverage_15": 0.027476230938918888,
|
|
"rewards/frontier_coverage_20": 0.027476230938918888,
|
|
"rewards/frontier_coverage_25": 0.027476230938918888,
|
|
"rewards/frontier_coverage_5": 0.027476230938918888,
|
|
"rewards/frontier_ece_reward": 0.020292357727885246,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1541992172598839,
|
|
"signal/accuracy_reward/group_std_mean": 0.20872304141521453,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.39166666865348815,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07709960862994195,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07709960862994195,
|
|
"signal/advantage_abs_mean": 0.0929091677069664,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0929091677069664,
|
|
"signal/advantage_pre_scale_std": 0.16713809072971345,
|
|
"signal/advantage_std": 0.16713809072971345,
|
|
"signal/brier_reward/centered_abs_mean": 0.14138388335704805,
|
|
"signal/brier_reward/group_std_mean": 0.183754500746727,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017672985419631006,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017672985419631006,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04437449499964714,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06984723061323166,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005546811874955893,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005546811874955893,
|
|
"signal/format_reward/centered_abs_mean": 0.02253146693110466,
|
|
"signal/format_reward/group_std_mean": 0.044410817325115204,
|
|
"signal/format_reward/group_zero_std_frac": 0.8111111164093018,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01126573346555233,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01126573346555233,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001606982573866844,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0025633119512349367,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.876498801924754e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.876498801924754e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13902547657489778,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18895367681980133,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024885560385882856,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024885560385882856,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13902547657489778,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18895367681980133,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024885560385882856,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024885560385882856,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13902547657489778,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18895367681980133,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024885560385882856,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024885560385882856,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13902547657489778,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.18895367681980133,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024885560385882856,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024885560385882856,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13902547657489778,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.18895367681980133,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024885560385882856,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024885560385882856,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13902547657489778,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18895367681980133,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024885560385882856,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024885560385882856,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.022856369987130164,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02880855239927769,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028570462483912705,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028570462483912705,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1551460457832317,
|
|
"calibration/batch_distribution_entropy": 0.7586701247838616,
|
|
"calibration/buffer_distribution_entropy": 0.7802119276107362,
|
|
"calibration/confidence_entropy": 0.36355490124193934,
|
|
"calibration/coverage@0%": 0.014926286768201092,
|
|
"calibration/coverage@1%": 0.014926286768201092,
|
|
"calibration/coverage@10%": 0.40697873647633714,
|
|
"calibration/coverage@15%": 0.5172635027475139,
|
|
"calibration/coverage@20%": 0.7615805455715663,
|
|
"calibration/coverage@25%": 0.8873517883711664,
|
|
"calibration/coverage@30%": 0.9523954341064982,
|
|
"calibration/coverage@5%": 0.1930317832221018,
|
|
"calibration/ece": 0.10565811367996707,
|
|
"calibration/mean_confidence": 0.7530231723896744,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014149305555555557,
|
|
"completions/max_length": 3340.4,
|
|
"completions/max_terminated_length": 3340.4,
|
|
"completions/mean_length": 733.7210815429687,
|
|
"completions/mean_terminated_length": 744.2840087890625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 227.8,
|
|
"epoch": 0.2999962500468744,
|
|
"grad_norm": 0.00039019520045258105,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": -0.0142,
|
|
"num_tokens": 256740682.0,
|
|
"reward": 1.0580396413803101,
|
|
"reward_std": 0.13579430282115937,
|
|
"rewards/accuracy_reward": 0.6907986164093017,
|
|
"rewards/brier_reward": 0.8093456387519836,
|
|
"rewards/confidence_uniqueness_reward": 0.8919880509376525,
|
|
"rewards/format_reward": 0.985850703716278,
|
|
"rewards/frontier_aurc_reward": -0.0017035908997058869,
|
|
"rewards/frontier_coverage_1": 0.03645942322909832,
|
|
"rewards/frontier_coverage_10": 0.03645942322909832,
|
|
"rewards/frontier_coverage_15": 0.03645942322909832,
|
|
"rewards/frontier_coverage_20": 0.03645942322909832,
|
|
"rewards/frontier_coverage_25": 0.03645942322909832,
|
|
"rewards/frontier_coverage_5": 0.03645942322909832,
|
|
"rewards/frontier_ece_reward": 0.025304096192121504,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15849609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.2060663789510727,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.079248046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.079248046875,
|
|
"signal/advantage_abs_mean": 0.09873658120632171,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09873658120632171,
|
|
"signal/advantage_pre_scale_std": 0.17506541907787324,
|
|
"signal/advantage_std": 0.17506541907787324,
|
|
"signal/brier_reward/centered_abs_mean": 0.1506500333547592,
|
|
"signal/brier_reward/group_std_mean": 0.19419657588005065,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0188312541693449,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0188312541693449,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05950758457183838,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08664509057998657,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007438448071479797,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007438448071479797,
|
|
"signal/format_reward/centered_abs_mean": 0.02527669258415699,
|
|
"signal/format_reward/group_std_mean": 0.0472879096865654,
|
|
"signal/format_reward/group_zero_std_frac": 0.8138888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012638346292078495,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012638346292078495,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022916203830391168,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0036330488976091145,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.102000239072368e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.102000239072368e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13295071721076965,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18857296407222748,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00237981784157455,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00237981784157455,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13295071721076965,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18857296407222748,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00237981784157455,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00237981784157455,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13295071721076965,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18857296407222748,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00237981784157455,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00237981784157455,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13295071721076965,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.18857296407222748,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00237981784157455,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00237981784157455,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13295071721076965,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.18857296407222748,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00237981784157455,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00237981784157455,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13295071721076965,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18857296407222748,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00237981784157455,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00237981784157455,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.024529390409588812,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03039325512945652,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030661738011986015,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030661738011986015,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21937393087793167,
|
|
"calibration/batch_distribution_entropy": 0.7982473467783474,
|
|
"calibration/buffer_distribution_entropy": 0.7814824550767334,
|
|
"calibration/confidence_entropy": 0.3547674800413071,
|
|
"calibration/coverage@0%": 0.010066104916730503,
|
|
"calibration/coverage@1%": 0.010066104916730503,
|
|
"calibration/coverage@10%": 0.1716406920783105,
|
|
"calibration/coverage@15%": 0.31217969811753826,
|
|
"calibration/coverage@20%": 0.51458351029786,
|
|
"calibration/coverage@25%": 0.6728991822165153,
|
|
"calibration/coverage@30%": 0.8798521895852263,
|
|
"calibration/coverage@5%": 0.05594028905782202,
|
|
"calibration/ece": 0.13592891891689865,
|
|
"calibration/mean_confidence": 0.713633518271023,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015364583333333326,
|
|
"completions/max_length": 3396.2,
|
|
"completions/max_terminated_length": 3396.2,
|
|
"completions/mean_length": 766.4459350585937,
|
|
"completions/mean_terminated_length": 778.4260375976562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 185.4,
|
|
"epoch": 0.3119961000487494,
|
|
"grad_norm": 0.0004305084585212171,
|
|
"learning_rate": 2.349397590361446e-06,
|
|
"loss": -0.0145,
|
|
"num_tokens": 268694939.0,
|
|
"reward": 1.0496493101119995,
|
|
"reward_std": 0.1406361937522888,
|
|
"rewards/accuracy_reward": 0.6756076455116272,
|
|
"rewards/brier_reward": 0.800056254863739,
|
|
"rewards/confidence_uniqueness_reward": 0.8977401375770568,
|
|
"rewards/format_reward": 0.9846354126930237,
|
|
"rewards/frontier_aurc_reward": -0.001901687984354794,
|
|
"rewards/frontier_coverage_1": 0.0432504091411829,
|
|
"rewards/frontier_coverage_10": 0.0432504091411829,
|
|
"rewards/frontier_coverage_15": 0.0432504091411829,
|
|
"rewards/frontier_coverage_20": 0.0432504091411829,
|
|
"rewards/frontier_coverage_25": 0.0432504091411829,
|
|
"rewards/frontier_coverage_5": 0.0432504091411829,
|
|
"rewards/frontier_ece_reward": 0.02153747119009495,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16565212607383728,
|
|
"signal/accuracy_reward/group_std_mean": 0.21800636351108552,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.37777777910232546,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08282606303691864,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08282606303691864,
|
|
"signal/advantage_abs_mean": 0.1018408328294754,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1018408328294754,
|
|
"signal/advantage_pre_scale_std": 0.17831650972366334,
|
|
"signal/advantage_std": 0.17831650972366334,
|
|
"signal/brier_reward/centered_abs_mean": 0.15308336317539215,
|
|
"signal/brier_reward/group_std_mean": 0.19842869639396668,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019135420396924018,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019135420396924018,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0566624753177166,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08453233689069747,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007082809414714575,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007082809414714575,
|
|
"signal/format_reward/centered_abs_mean": 0.0266004778444767,
|
|
"signal/format_reward/group_std_mean": 0.049190875887870786,
|
|
"signal/format_reward/group_zero_std_frac": 0.8055555820465088,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01330023892223835,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01330023892223835,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002477661520242691,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0040058012586086985,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.435014052432962e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.435014052432962e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14277483373880387,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19749794900417328,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025556694716215135,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025556694716215135,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14277483373880387,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19749794900417328,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025556694716215135,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025556694716215135,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14277483373880387,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19749794900417328,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025556694716215135,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025556694716215135,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.14277483373880387,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19749794900417328,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025556694716215135,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025556694716215135,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14277483373880387,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.19749794900417328,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025556694716215135,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025556694716215135,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14277483373880387,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19749794900417328,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025556694716215135,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025556694716215135,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.023040265217423438,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.028788076341152193,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028800331521779297,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028800331521779297,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21178649539996738,
|
|
"calibration/batch_distribution_entropy": 0.8621768605224684,
|
|
"calibration/buffer_distribution_entropy": 0.7861469496776874,
|
|
"calibration/confidence_entropy": 0.41761171099367456,
|
|
"calibration/coverage@0%": 0.011613063660477454,
|
|
"calibration/coverage@1%": 0.10067556366047745,
|
|
"calibration/coverage@10%": 0.26419098143236075,
|
|
"calibration/coverage@15%": 0.37293739888807675,
|
|
"calibration/coverage@20%": 0.548718324485379,
|
|
"calibration/coverage@25%": 0.6227433237201573,
|
|
"calibration/coverage@30%": 0.7633802420489727,
|
|
"calibration/coverage@5%": 0.19650889699381077,
|
|
"calibration/ece": 0.1498688341904781,
|
|
"calibration/mean_confidence": 0.6908206350495991,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 3772.4,
|
|
"completions/max_terminated_length": 3772.4,
|
|
"completions/mean_length": 758.24306640625,
|
|
"completions/mean_terminated_length": 767.1976684570312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 205.2,
|
|
"epoch": 0.32399595005062437,
|
|
"grad_norm": 0.00038663134910166264,
|
|
"learning_rate": 2.1987951807228917e-06,
|
|
"loss": -0.0098,
|
|
"num_tokens": 280522923.0,
|
|
"reward": 1.0594348669052125,
|
|
"reward_std": 0.12770668268203736,
|
|
"rewards/accuracy_reward": 0.6868055582046508,
|
|
"rewards/brier_reward": 0.8069122552871704,
|
|
"rewards/confidence_uniqueness_reward": 0.9251725912094116,
|
|
"rewards/format_reward": 0.9881944537162781,
|
|
"rewards/frontier_aurc_reward": -0.0012947394163347781,
|
|
"rewards/frontier_coverage_1": 0.033542437851428984,
|
|
"rewards/frontier_coverage_10": 0.033542437851428984,
|
|
"rewards/frontier_coverage_15": 0.033542437851428984,
|
|
"rewards/frontier_coverage_20": 0.033542437851428984,
|
|
"rewards/frontier_coverage_25": 0.033542437851428984,
|
|
"rewards/frontier_coverage_5": 0.033542437851428984,
|
|
"rewards/frontier_ece_reward": 0.014759739115834235,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15372178852558135,
|
|
"signal/accuracy_reward/group_std_mean": 0.20507141947746277,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07686089426279068,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07686089426279068,
|
|
"signal/advantage_abs_mean": 0.09132870435714721,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09132870435714721,
|
|
"signal/advantage_pre_scale_std": 0.16071320176124573,
|
|
"signal/advantage_std": 0.16071320176124573,
|
|
"signal/brier_reward/centered_abs_mean": 0.14716649651527405,
|
|
"signal/brier_reward/group_std_mean": 0.19055280685424805,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018395812064409257,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018395812064409257,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.038749721646308896,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06294624656438827,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004843715205788612,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004843715205788612,
|
|
"signal/format_reward/centered_abs_mean": 0.02109375,
|
|
"signal/format_reward/group_std_mean": 0.042469137161970136,
|
|
"signal/format_reward/group_zero_std_frac": 0.8166666746139526,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010546875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010546875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013480266556143762,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022415920160710812,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4129677694872952e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4129677694872952e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16463718116283416,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22296231091022492,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002947005443274975,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002947005443274975,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16463718116283416,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22296231091022492,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002947005443274975,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002947005443274975,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16463718116283416,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22296231091022492,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002947005443274975,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002947005443274975,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16463718116283416,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22296231091022492,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002947005443274975,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002947005443274975,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16463718116283416,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22296231091022492,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002947005443274975,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002947005443274975,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16463718116283416,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22296231091022492,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002947005443274975,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002947005443274975,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.019040508940815926,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02431493140757084,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002380063617601991,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002380063617601991,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.15735400009669226,
|
|
"calibration/batch_distribution_entropy": 0.867213508049151,
|
|
"calibration/buffer_distribution_entropy": 0.7960778689126401,
|
|
"calibration/confidence_entropy": 0.43905768456117134,
|
|
"calibration/coverage@0%": 0.031665669247906085,
|
|
"calibration/coverage@1%": 0.031665669247906085,
|
|
"calibration/coverage@10%": 0.36938394329841706,
|
|
"calibration/coverage@15%": 0.577238562485273,
|
|
"calibration/coverage@20%": 0.7212511045241308,
|
|
"calibration/coverage@25%": 0.8655174528200844,
|
|
"calibration/coverage@30%": 0.9547358513805883,
|
|
"calibration/coverage@5%": 0.06221147821476769,
|
|
"calibration/ece": 0.10685238076566583,
|
|
"calibration/mean_confidence": 0.6762039937526211,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014236111111111093,
|
|
"completions/max_length": 3602.8,
|
|
"completions/max_terminated_length": 3602.8,
|
|
"completions/mean_length": 773.11328125,
|
|
"completions/mean_terminated_length": 784.2122314453125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 239.6,
|
|
"epoch": 0.33599580005249935,
|
|
"grad_norm": 0.00038202741416171193,
|
|
"learning_rate": 2.0481927710843377e-06,
|
|
"loss": -0.0127,
|
|
"num_tokens": 292533412.0,
|
|
"reward": 1.0550405263900757,
|
|
"reward_std": 0.12688146978616716,
|
|
"rewards/accuracy_reward": 0.6841145992279053,
|
|
"rewards/brier_reward": 0.8033087968826294,
|
|
"rewards/confidence_uniqueness_reward": 0.9210085034370422,
|
|
"rewards/format_reward": 0.9856770753860473,
|
|
"rewards/frontier_aurc_reward": -0.0011435442487709225,
|
|
"rewards/frontier_coverage_1": 0.028446093632373957,
|
|
"rewards/frontier_coverage_10": 0.028446093632373957,
|
|
"rewards/frontier_coverage_15": 0.028446093632373957,
|
|
"rewards/frontier_coverage_20": 0.028446093632373957,
|
|
"rewards/frontier_coverage_25": 0.027299534215126188,
|
|
"rewards/frontier_coverage_5": 0.028446093632373957,
|
|
"rewards/frontier_ece_reward": 0.012727185152471066,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1436794728040695,
|
|
"signal/accuracy_reward/group_std_mean": 0.19504518210887908,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4333333373069763,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07183973640203475,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07183973640203475,
|
|
"signal/advantage_abs_mean": 0.09040538519620896,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09040538519620896,
|
|
"signal/advantage_pre_scale_std": 0.1635303646326065,
|
|
"signal/advantage_std": 0.1635303646326065,
|
|
"signal/brier_reward/centered_abs_mean": 0.14187564551830292,
|
|
"signal/brier_reward/group_std_mean": 0.18244777321815492,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017734455689787865,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017734455689787865,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.041977598518133166,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06911587193608285,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005247199814766646,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005247199814766646,
|
|
"signal/format_reward/centered_abs_mean": 0.02453884594142437,
|
|
"signal/format_reward/group_std_mean": 0.04877747595310211,
|
|
"signal/format_reward/group_zero_std_frac": 0.7888888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012269422970712185,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012269422970712185,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011516727041453122,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.001979802688583732,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0614940876839682e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0614940876839682e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1493788868188858,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20525516271591188,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026738820131868126,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026738820131868126,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1493788868188858,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20525516271591188,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026738820131868126,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026738820131868126,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1493788868188858,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20525516271591188,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026738820131868126,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026738820131868126,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1493788868188858,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20525516271591188,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026738820131868126,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026738820131868126,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14764404296875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.20283401310443877,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002642828319221735,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002642828319221735,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1493788868188858,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20525516271591188,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026738820131868126,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026738820131868126,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.016555101424455643,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.021539781242609024,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020693876780569554,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020693876780569554,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.17760189147420782,
|
|
"calibration/batch_distribution_entropy": 0.8614810747703359,
|
|
"calibration/buffer_distribution_entropy": 0.8124016039305744,
|
|
"calibration/confidence_entropy": 0.4269929605650483,
|
|
"calibration/coverage@0%": 0.04225979049059177,
|
|
"calibration/coverage@1%": 0.04225979049059177,
|
|
"calibration/coverage@10%": 0.29135738789953486,
|
|
"calibration/coverage@15%": 0.5114959489018647,
|
|
"calibration/coverage@20%": 0.6514218042908502,
|
|
"calibration/coverage@25%": 0.7691949016487275,
|
|
"calibration/coverage@30%": 0.8515852075218829,
|
|
"calibration/coverage@5%": 0.07188663538059455,
|
|
"calibration/ece": 0.11239056071129601,
|
|
"calibration/mean_confidence": 0.6774057122905555,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014756944444444442,
|
|
"completions/max_length": 3140.8,
|
|
"completions/max_terminated_length": 3140.8,
|
|
"completions/mean_length": 768.288916015625,
|
|
"completions/mean_terminated_length": 779.8587036132812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 228.8,
|
|
"epoch": 0.34799565005437433,
|
|
"grad_norm": 0.0003542336344253272,
|
|
"learning_rate": 1.8975903614457832e-06,
|
|
"loss": -0.0128,
|
|
"num_tokens": 304448708.0,
|
|
"reward": 1.0733104705810548,
|
|
"reward_std": 0.12197275906801223,
|
|
"rewards/accuracy_reward": 0.7184027671813965,
|
|
"rewards/brier_reward": 0.8237447381019593,
|
|
"rewards/confidence_uniqueness_reward": 0.9106087207794189,
|
|
"rewards/format_reward": 0.98515625,
|
|
"rewards/frontier_aurc_reward": -0.0011269306181930006,
|
|
"rewards/frontier_coverage_1": 0.02765751425176859,
|
|
"rewards/frontier_coverage_10": 0.02765751425176859,
|
|
"rewards/frontier_coverage_15": 0.02765751425176859,
|
|
"rewards/frontier_coverage_20": 0.02765751425176859,
|
|
"rewards/frontier_coverage_25": 0.029555964469909667,
|
|
"rewards/frontier_coverage_5": 0.02765751425176859,
|
|
"rewards/frontier_ece_reward": 0.014020322076976299,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1314453125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1816246747970581,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.45277778506278993,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06572265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06572265625,
|
|
"signal/advantage_abs_mean": 0.08338052183389663,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08338052183389663,
|
|
"signal/advantage_pre_scale_std": 0.16272049248218537,
|
|
"signal/advantage_std": 0.16272049248218537,
|
|
"signal/brier_reward/centered_abs_mean": 0.12779132276773453,
|
|
"signal/brier_reward/group_std_mean": 0.16802054941654204,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015973915345966816,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015973915345966816,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.046132729202508924,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07453691065311432,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0057665911503136155,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0057665911503136155,
|
|
"signal/format_reward/centered_abs_mean": 0.02614474855363369,
|
|
"signal/format_reward/group_std_mean": 0.05145877227187157,
|
|
"signal/format_reward/group_zero_std_frac": 0.7833333492279053,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013072374276816845,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013072374276816845,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001297543675173074,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022464465117082,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3226030680234545e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3226030680234545e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1275523856282234,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17685183584690095,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022831874433904887,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022831874433904887,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1275523856282234,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17685183584690095,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022831874433904887,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022831874433904887,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1275523856282234,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17685183584690095,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022831874433904887,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022831874433904887,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1275523856282234,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.17685183584690095,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022831874433904887,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022831874433904887,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09737538546323776,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13730859458446504,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017430193023756147,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017430193023756147,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1275523856282234,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17685183584690095,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022831874433904887,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022831874433904887,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014937486127018928,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01869678348302841,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001867185765877366,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001867185765877366,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1711177612994187,
|
|
"calibration/batch_distribution_entropy": 0.7979294550533148,
|
|
"calibration/buffer_distribution_entropy": 0.8252856931612047,
|
|
"calibration/confidence_entropy": 0.3867997528995013,
|
|
"calibration/coverage@0%": 0.028857090429138067,
|
|
"calibration/coverage@1%": 0.028857090429138067,
|
|
"calibration/coverage@10%": 0.39941504381293946,
|
|
"calibration/coverage@15%": 0.4653468268344284,
|
|
"calibration/coverage@20%": 0.6041595882856419,
|
|
"calibration/coverage@25%": 0.7318645921120241,
|
|
"calibration/coverage@30%": 0.8313806917894677,
|
|
"calibration/coverage@5%": 0.20213240230711796,
|
|
"calibration/ece": 0.14017483881446374,
|
|
"calibration/mean_confidence": 0.7112044312383862,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010416666666666675,
|
|
"completions/max_length": 3437.8,
|
|
"completions/max_terminated_length": 3437.8,
|
|
"completions/mean_length": 839.7661499023437,
|
|
"completions/mean_terminated_length": 848.5783813476562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 249.6,
|
|
"epoch": 0.3599955000562493,
|
|
"grad_norm": 0.00046212406596168876,
|
|
"learning_rate": 1.7469879518072292e-06,
|
|
"loss": -0.008,
|
|
"num_tokens": 317233150.0,
|
|
"reward": 1.065112328529358,
|
|
"reward_std": 0.12633269131183625,
|
|
"rewards/accuracy_reward": 0.70234375,
|
|
"rewards/brier_reward": 0.8162197589874267,
|
|
"rewards/confidence_uniqueness_reward": 0.8988808989524841,
|
|
"rewards/format_reward": 0.9894965291023254,
|
|
"rewards/frontier_aurc_reward": -0.0015927208121865987,
|
|
"rewards/frontier_coverage_1": 0.03101687040179968,
|
|
"rewards/frontier_coverage_10": 0.03101687040179968,
|
|
"rewards/frontier_coverage_15": 0.03101687040179968,
|
|
"rewards/frontier_coverage_20": 0.031029899418354035,
|
|
"rewards/frontier_coverage_25": 0.0334394596517086,
|
|
"rewards/frontier_coverage_5": 0.03101687040179968,
|
|
"rewards/frontier_ece_reward": 0.011666352301836014,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15368380695581435,
|
|
"signal/accuracy_reward/group_std_mean": 0.1997191309928894,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43611111044883727,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07684190347790718,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07684190347790718,
|
|
"signal/advantage_abs_mean": 0.09196306616067887,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09196306616067887,
|
|
"signal/advantage_pre_scale_std": 0.165494641661644,
|
|
"signal/advantage_std": 0.165494641661644,
|
|
"signal/brier_reward/centered_abs_mean": 0.13116701394319535,
|
|
"signal/brier_reward/group_std_mean": 0.17333021759986877,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01639587674289942,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01639587674289942,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.046421286463737485,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07029250860214234,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005802660807967186,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005802660807967186,
|
|
"signal/format_reward/centered_abs_mean": 0.01837565079331398,
|
|
"signal/format_reward/group_std_mean": 0.0379862654954195,
|
|
"signal/format_reward/group_zero_std_frac": 0.8305555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00918782539665699,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00918782539665699,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020536962430924175,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003437606617808342,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.67611584806582e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.67611584806582e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11324008107185364,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1635311007499695,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020269973436370494,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020269973436370494,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11324008107185364,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1635311007499695,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020269973436370494,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020269973436370494,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11324008107185364,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1635311007499695,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020269973436370494,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020269973436370494,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10615915805101395,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.15414920151233674,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019002489047124983,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019002489047124983,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.060843870788812635,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08873464614152908,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001089105277787894,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001089105277787894,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11324008107185364,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1635311007499695,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020269973436370494,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020269973436370494,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01169421263039112,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.014634997583925724,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00146177657879889,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00146177657879889,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.3599955000562493,
|
|
"eval_calibration/aurc": 0.16175791904643544,
|
|
"eval_calibration/batch_distribution_entropy": 0.7352031129148272,
|
|
"eval_calibration/buffer_distribution_entropy": 0.832478781306993,
|
|
"eval_calibration/confidence_entropy": 0.39508010082462275,
|
|
"eval_calibration/coverage@0%": 0.19808467741935484,
|
|
"eval_calibration/coverage@1%": 0.19808467741935484,
|
|
"eval_calibration/coverage@10%": 0.47211021505376344,
|
|
"eval_calibration/coverage@15%": 0.5816532258064516,
|
|
"eval_calibration/coverage@20%": 0.7856182795698925,
|
|
"eval_calibration/coverage@25%": 0.8489583333333334,
|
|
"eval_calibration/coverage@30%": 0.9010416666666666,
|
|
"eval_calibration/coverage@5%": 0.21370967741935484,
|
|
"eval_calibration/ece": 0.16735424483653735,
|
|
"eval_calibration/mean_confidence": 0.749357968833574,
|
|
"eval_completions/clipped_ratio": 0.006944444444444457,
|
|
"eval_completions/max_length": 2777.6666666666665,
|
|
"eval_completions/max_terminated_length": 2777.6666666666665,
|
|
"eval_completions/mean_length": 823.3238016764323,
|
|
"eval_completions/mean_terminated_length": 829.1297912597656,
|
|
"eval_completions/min_length": 111.33333333333333,
|
|
"eval_completions/min_terminated_length": 290.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 317233150.0,
|
|
"eval_reward": 1.0572884281476338,
|
|
"eval_reward_std": 0.25147593518098194,
|
|
"eval_rewards/accuracy_reward": 0.6996527711550394,
|
|
"eval_rewards/brier_reward": 0.8065233925978342,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8552062610785166,
|
|
"eval_rewards/format_reward": 0.9921875099341074,
|
|
"eval_rewards/frontier_aurc_reward": -0.00195368086375917,
|
|
"eval_rewards/frontier_coverage_1": 0.021662883625443403,
|
|
"eval_rewards/frontier_coverage_10": 0.021662883625443403,
|
|
"eval_rewards/frontier_coverage_15": 0.021662883625443403,
|
|
"eval_rewards/frontier_coverage_20": 0.02181592263514176,
|
|
"eval_rewards/frontier_coverage_25": 0.035129744869967304,
|
|
"eval_rewards/frontier_coverage_5": 0.021662883625443403,
|
|
"eval_rewards/frontier_ece_reward": 0.008933214703574777,
|
|
"eval_runtime": 198.6148,
|
|
"eval_samples_per_second": 5.035,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4021267344554265,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4533983866373698,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20106336722771326,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20106336722771326,
|
|
"eval_signal/advantage_abs_mean": 0.2142608513434728,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2142608513434728,
|
|
"eval_signal/advantage_pre_scale_std": 0.2503946051001549,
|
|
"eval_signal/advantage_std": 0.2503946051001549,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.22845095644394556,
|
|
"eval_signal/brier_reward/group_std_mean": 0.28837570548057556,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028556369555493195,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.028556369555493195,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06686499528586864,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09637108817696571,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00835812441073358,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00835812441073358,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.015136718594779571,
|
|
"eval_signal/format_reward/group_std_mean": 0.04419417337824901,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.750000019868215,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007568359297389786,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359297389786,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003255114386168619,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0065320210220913095,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.82665494827476e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.82665494827476e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.15939685453971228,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.28508878250916797,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028532035648822784,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028532035648822784,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.15939685453971228,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.28508878250916797,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028532035648822784,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028532035648822784,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.15939685453971228,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.28508878250916797,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028532035648822784,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028532035648822784,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.12750840187072754,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.23698227355877557,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022824003632801273,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022824003632801273,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.07293465360999107,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.12300009404619534,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013055303134024143,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013055303134024143,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.15939685453971228,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.28508878250916797,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028532035648822784,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028532035648822784,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.013653319949905077,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.017306591384112835,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017066649937381346,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017066649937381346,
|
|
"eval_steps_per_second": 0.03,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.15804363675783265,
|
|
"calibration/batch_distribution_entropy": 0.8399256406752158,
|
|
"calibration/buffer_distribution_entropy": 0.8364387748440253,
|
|
"calibration/confidence_entropy": 0.4143819803032832,
|
|
"calibration/coverage@0%": 0.014081538294168843,
|
|
"calibration/coverage@1%": 0.014081538294168843,
|
|
"calibration/coverage@10%": 0.4197884627652546,
|
|
"calibration/coverage@15%": 0.5361398299340652,
|
|
"calibration/coverage@20%": 0.6640488229008608,
|
|
"calibration/coverage@25%": 0.8362543516100958,
|
|
"calibration/coverage@30%": 0.9181149369016536,
|
|
"calibration/coverage@5%": 0.0701024807025458,
|
|
"calibration/ece": 0.10372717760673841,
|
|
"calibration/mean_confidence": 0.7007285385392891,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.007725694444444442,
|
|
"completions/max_length": 3224.8,
|
|
"completions/max_terminated_length": 3224.8,
|
|
"completions/mean_length": 815.9788208007812,
|
|
"completions/mean_terminated_length": 822.4533203125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 248.4,
|
|
"epoch": 0.3719953500581243,
|
|
"grad_norm": 0.0004315991827752441,
|
|
"learning_rate": 1.5963855421686747e-06,
|
|
"loss": -0.0049,
|
|
"num_tokens": 329740938.0,
|
|
"reward": 1.0920594453811645,
|
|
"reward_std": 0.11862210929393768,
|
|
"rewards/accuracy_reward": 0.7427083373069763,
|
|
"rewards/brier_reward": 0.8469986200332642,
|
|
"rewards/confidence_uniqueness_reward": 0.9098951697349549,
|
|
"rewards/format_reward": 0.9921874880790711,
|
|
"rewards/frontier_aurc_reward": -0.001068349787965417,
|
|
"rewards/frontier_coverage_1": 0.032322213798761365,
|
|
"rewards/frontier_coverage_10": 0.032322213798761365,
|
|
"rewards/frontier_coverage_15": 0.032322213798761365,
|
|
"rewards/frontier_coverage_20": 0.033581113815307616,
|
|
"rewards/frontier_coverage_25": 0.053568636626005174,
|
|
"rewards/frontier_coverage_5": 0.032322213798761365,
|
|
"rewards/frontier_ece_reward": 0.00915743401274085,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15378689169883727,
|
|
"signal/accuracy_reward/group_std_mean": 0.2032044380903244,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4138888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07689344584941864,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07689344584941864,
|
|
"signal/advantage_abs_mean": 0.08521311953663827,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08521311953663827,
|
|
"signal/advantage_pre_scale_std": 0.15650778114795685,
|
|
"signal/advantage_std": 0.15650778114795685,
|
|
"signal/brier_reward/centered_abs_mean": 0.11616129875183105,
|
|
"signal/brier_reward/group_std_mean": 0.1569095641374588,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014520162343978881,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014520162343978881,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03967732265591621,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05846917554736138,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004959665331989526,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004959665331989526,
|
|
"signal/format_reward/centered_abs_mean": 0.013953992887400091,
|
|
"signal/format_reward/group_std_mean": 0.02798333503305912,
|
|
"signal/format_reward/group_zero_std_frac": 0.8805555462837219,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0069769964437000455,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0069769964437000455,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014973450219258667,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0027009368874132632,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6802473803400063e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6802473803400063e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12041537314653397,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1706594407558441,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021554350852966307,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021554350852966307,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12041537314653397,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1706594407558441,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021554350852966307,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021554350852966307,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12041537314653397,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1706594407558441,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021554350852966307,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021554350852966307,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08516337871551513,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12315509170293808,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015244244365021586,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015244244365021586,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05519420728087425,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07446658313274383,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009879762423224748,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009879762423224748,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12041537314653397,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1706594407558441,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021554350852966307,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021554350852966307,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008705221116542816,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011256015487015247,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001088152639567852,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001088152639567852,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1396131852381405,
|
|
"calibration/batch_distribution_entropy": 0.8001701977514258,
|
|
"calibration/buffer_distribution_entropy": 0.8432206775920253,
|
|
"calibration/confidence_entropy": 0.42837628908071357,
|
|
"calibration/coverage@0%": 0.038418853892540475,
|
|
"calibration/coverage@1%": 0.038418853892540475,
|
|
"calibration/coverage@10%": 0.6409266970960997,
|
|
"calibration/coverage@15%": 0.7230105375149384,
|
|
"calibration/coverage@20%": 0.7985909217488165,
|
|
"calibration/coverage@25%": 0.8646003898635477,
|
|
"calibration/coverage@30%": 0.8846560846560847,
|
|
"calibration/coverage@5%": 0.18981191832059213,
|
|
"calibration/ece": 0.12949856679199157,
|
|
"calibration/mean_confidence": 0.7311427370325003,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010590277777777768,
|
|
"completions/max_length": 3660.8,
|
|
"completions/max_terminated_length": 3660.8,
|
|
"completions/mean_length": 833.2264892578125,
|
|
"completions/mean_terminated_length": 842.1909423828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 230.8,
|
|
"epoch": 0.38399520005999926,
|
|
"grad_norm": 0.00039733736775815487,
|
|
"learning_rate": 1.4457831325301204e-06,
|
|
"loss": -0.0093,
|
|
"num_tokens": 342427003.0,
|
|
"reward": 1.054310917854309,
|
|
"reward_std": 0.12334007620811463,
|
|
"rewards/accuracy_reward": 0.6795138835906982,
|
|
"rewards/brier_reward": 0.8059031248092652,
|
|
"rewards/confidence_uniqueness_reward": 0.9150711178779602,
|
|
"rewards/format_reward": 0.9894097328186036,
|
|
"rewards/frontier_aurc_reward": -0.0016405290691182018,
|
|
"rewards/frontier_coverage_1": 0.03431166112422943,
|
|
"rewards/frontier_coverage_10": 0.03431166112422943,
|
|
"rewards/frontier_coverage_15": 0.03431166112422943,
|
|
"rewards/frontier_coverage_20": 0.031892279908061025,
|
|
"rewards/frontier_coverage_25": 0.05291588976979256,
|
|
"rewards/frontier_coverage_5": 0.03431166112422943,
|
|
"rewards/frontier_ece_reward": 0.006254972610622645,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15179036557674408,
|
|
"signal/accuracy_reward/group_std_mean": 0.19949381947517394,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43611111044883727,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07589518278837204,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07589518278837204,
|
|
"signal/advantage_abs_mean": 0.08981405347585678,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08981405347585678,
|
|
"signal/advantage_pre_scale_std": 0.1621391087770462,
|
|
"signal/advantage_std": 0.1621391087770462,
|
|
"signal/brier_reward/centered_abs_mean": 0.133778178691864,
|
|
"signal/brier_reward/group_std_mean": 0.17374806702136994,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016722272336483,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016722272336483,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03996127396821976,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06208924725651741,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00499515924602747,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00499515924602747,
|
|
"signal/format_reward/centered_abs_mean": 0.01856553815305233,
|
|
"signal/format_reward/group_std_mean": 0.037125248461961746,
|
|
"signal/format_reward/group_zero_std_frac": 0.8388888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009282769076526165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009282769076526165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018948239739984274,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0033697550650686027,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.39173486281652e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.39173486281652e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1356187418103218,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18712888658046722,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002427575411275029,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002427575411275029,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1356187418103218,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18712888658046722,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002427575411275029,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002427575411275029,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1356187418103218,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18712888658046722,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002427575411275029,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002427575411275029,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08282427489757538,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1169714629650116,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014825545251369477,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014825545251369477,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.060303305834531785,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07883718758821487,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010794291738420725,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010794291738420725,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1356187418103218,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18712888658046722,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002427575411275029,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002427575411275029,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008292005583643913,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010889817215502261,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001036500697955489,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001036500697955489,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1401170386334239,
|
|
"calibration/batch_distribution_entropy": 0.8628322122419168,
|
|
"calibration/buffer_distribution_entropy": 0.8477875709184838,
|
|
"calibration/confidence_entropy": 0.41931734632897494,
|
|
"calibration/coverage@0%": 0.04187046632642211,
|
|
"calibration/coverage@1%": 0.04187046632642211,
|
|
"calibration/coverage@10%": 0.5120449317449931,
|
|
"calibration/coverage@15%": 0.6430785094700009,
|
|
"calibration/coverage@20%": 0.7207332569334908,
|
|
"calibration/coverage@25%": 0.8257768317559318,
|
|
"calibration/coverage@30%": 0.9018360536714092,
|
|
"calibration/coverage@5%": 0.29195404140585296,
|
|
"calibration/ece": 0.12538457953207308,
|
|
"calibration/mean_confidence": 0.6443893716018676,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.008680555555555558,
|
|
"completions/max_length": 3490.6,
|
|
"completions/max_terminated_length": 3490.6,
|
|
"completions/mean_length": 873.49384765625,
|
|
"completions/mean_terminated_length": 881.2654296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 242.8,
|
|
"epoch": 0.39599505006187424,
|
|
"grad_norm": 0.0004413281276356429,
|
|
"learning_rate": 1.2951807228915664e-06,
|
|
"loss": -0.007,
|
|
"num_tokens": 355628724.0,
|
|
"reward": 1.0522673010826111,
|
|
"reward_std": 0.11457638144493103,
|
|
"rewards/accuracy_reward": 0.668836796283722,
|
|
"rewards/brier_reward": 0.8068422317504883,
|
|
"rewards/confidence_uniqueness_reward": 0.9226135849952698,
|
|
"rewards/format_reward": 0.9913194417953491,
|
|
"rewards/frontier_aurc_reward": -0.0013329184614121914,
|
|
"rewards/frontier_coverage_1": 0.04676450602710247,
|
|
"rewards/frontier_coverage_10": 0.04676450602710247,
|
|
"rewards/frontier_coverage_15": 0.04676450602710247,
|
|
"rewards/frontier_coverage_20": 0.040921327844262126,
|
|
"rewards/frontier_coverage_25": 0.06650637164711952,
|
|
"rewards/frontier_coverage_5": 0.04676450602710247,
|
|
"rewards/frontier_ece_reward": 0.006077949050813913,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1448296457529068,
|
|
"signal/accuracy_reward/group_std_mean": 0.1931760638952255,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4388888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0724148228764534,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0724148228764534,
|
|
"signal/advantage_abs_mean": 0.08313089311122894,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08313089311122894,
|
|
"signal/advantage_pre_scale_std": 0.15007005333900453,
|
|
"signal/advantage_std": 0.15007005333900453,
|
|
"signal/brier_reward/centered_abs_mean": 0.13480945378541948,
|
|
"signal/brier_reward/group_std_mean": 0.17508352398872376,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016851181723177434,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016851181723177434,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03558523468673229,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05330366343259811,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004448154335841536,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004448154335841536,
|
|
"signal/format_reward/centered_abs_mean": 0.014876301772892475,
|
|
"signal/format_reward/group_std_mean": 0.028501024469733238,
|
|
"signal/format_reward/group_zero_std_frac": 0.8805555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007438150886446238,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007438150886446238,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014591423678211868,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0024731668643653395,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6118645473616196e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6118645473616196e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.155775585770607,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20951978862285614,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027883827686309816,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027883827686309816,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.155775585770607,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20951978862285614,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027883827686309816,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027883827686309816,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.155775585770607,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20951978862285614,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027883827686309816,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027883827686309816,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09291831254959107,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12665492296218872,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016632377402856946,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016632377402856946,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06688660979270936,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08502381294965744,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011972703039646148,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011972703039646148,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.155775585770607,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20951978862285614,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027883827686309816,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027883827686309816,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009020310081541538,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01170970220118761,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011275387601926922,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011275387601926922,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.10870616500791086,
|
|
"calibration/batch_distribution_entropy": 0.7903104452510206,
|
|
"calibration/buffer_distribution_entropy": 0.8493497664534088,
|
|
"calibration/confidence_entropy": 0.39392810200666045,
|
|
"calibration/coverage@0%": 0.039693163843954195,
|
|
"calibration/coverage@1%": 0.15010983051062085,
|
|
"calibration/coverage@10%": 0.6249913825702296,
|
|
"calibration/coverage@15%": 0.7163854370584731,
|
|
"calibration/coverage@20%": 0.8282437601072432,
|
|
"calibration/coverage@25%": 0.9081739226033421,
|
|
"calibration/coverage@30%": 0.9605802000879506,
|
|
"calibration/coverage@5%": 0.4011407911307888,
|
|
"calibration/ece": 0.08378010703101495,
|
|
"calibration/mean_confidence": 0.7437848913758066,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.008506944444444465,
|
|
"completions/max_length": 3674.0,
|
|
"completions/max_terminated_length": 3674.0,
|
|
"completions/mean_length": 838.0208374023438,
|
|
"completions/mean_terminated_length": 845.2309448242188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 239.8,
|
|
"epoch": 0.4079949000637492,
|
|
"grad_norm": 0.0004973475588485599,
|
|
"learning_rate": 1.1445783132530121e-06,
|
|
"loss": -0.0069,
|
|
"num_tokens": 368371908.0,
|
|
"reward": 1.0795949220657348,
|
|
"reward_std": 0.11285789757966995,
|
|
"rewards/accuracy_reward": 0.7223958373069763,
|
|
"rewards/brier_reward": 0.8292520999908447,
|
|
"rewards/confidence_uniqueness_reward": 0.9112018942832947,
|
|
"rewards/format_reward": 0.9914930462837219,
|
|
"rewards/frontier_aurc_reward": -0.001215806626714766,
|
|
"rewards/frontier_coverage_1": 0.028353986889123918,
|
|
"rewards/frontier_coverage_10": 0.028353986889123918,
|
|
"rewards/frontier_coverage_15": 0.028459986671805382,
|
|
"rewards/frontier_coverage_20": 0.033776380494236945,
|
|
"rewards/frontier_coverage_25": 0.0912104532122612,
|
|
"rewards/frontier_coverage_5": 0.028353986889123918,
|
|
"rewards/frontier_ece_reward": 0.006769264675676822,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1389865458011627,
|
|
"signal/accuracy_reward/group_std_mean": 0.1862527459859848,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.45555556416511533,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06949327290058135,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06949327290058135,
|
|
"signal/advantage_abs_mean": 0.08184980154037476,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08184980154037476,
|
|
"signal/advantage_pre_scale_std": 0.15188938081264497,
|
|
"signal/advantage_std": 0.15188938081264497,
|
|
"signal/brier_reward/centered_abs_mean": 0.12720216065645218,
|
|
"signal/brier_reward/group_std_mean": 0.1649218052625656,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015900270082056522,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015900270082056522,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03929706513881683,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05663086473941803,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004912133142352104,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004912133142352104,
|
|
"signal/format_reward/centered_abs_mean": 0.014507378544658422,
|
|
"signal/format_reward/group_std_mean": 0.0268052663654089,
|
|
"signal/format_reward/group_zero_std_frac": 0.8916666865348816,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007253689272329211,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007253689272329211,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016262418590486049,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0029089401010423898,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.910972725658212e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.910972725658212e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13412159085273742,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1826484888792038,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024007763247936966,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024007763247936966,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13412159085273742,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1826484888792038,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024007763247936966,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024007763247936966,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13297670781612397,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1811499148607254,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023802829906344413,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023802829906344413,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07466747760772705,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10277672857046127,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013365477789193392,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013365477789193392,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0674952432513237,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08588269799947738,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012081648223102094,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012081648223102094,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13412159085273742,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1826484888792038,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024007763247936966,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024007763247936966,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008575642108917236,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011006982997059822,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010719552636146545,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010719552636146545,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.11067553128722692,
|
|
"calibration/batch_distribution_entropy": 0.8481618438645689,
|
|
"calibration/buffer_distribution_entropy": 0.8483553201550436,
|
|
"calibration/confidence_entropy": 0.40064739663776583,
|
|
"calibration/coverage@0%": 0.03877597911227154,
|
|
"calibration/coverage@1%": 0.1033262436625361,
|
|
"calibration/coverage@10%": 0.5340208553130162,
|
|
"calibration/coverage@15%": 0.7327274043747142,
|
|
"calibration/coverage@20%": 0.8668559271692431,
|
|
"calibration/coverage@25%": 0.9450903394255874,
|
|
"calibration/coverage@30%": 0.9826005221932114,
|
|
"calibration/coverage@5%": 0.30878133853807876,
|
|
"calibration/ece": 0.0929162421597211,
|
|
"calibration/mean_confidence": 0.6861792539951647,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00859375,
|
|
"completions/max_length": 3590.6,
|
|
"completions/max_terminated_length": 3590.6,
|
|
"completions/mean_length": 870.5375122070312,
|
|
"completions/mean_terminated_length": 878.083056640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 246.8,
|
|
"epoch": 0.4199947500656242,
|
|
"grad_norm": 0.00044883930240757763,
|
|
"learning_rate": 9.93975903614458e-07,
|
|
"loss": -0.0071,
|
|
"num_tokens": 381508468.0,
|
|
"reward": 1.0745736122131349,
|
|
"reward_std": 0.11894840151071548,
|
|
"rewards/accuracy_reward": 0.7121527671813965,
|
|
"rewards/brier_reward": 0.8242484927177429,
|
|
"rewards/confidence_uniqueness_reward": 0.9156982779502869,
|
|
"rewards/format_reward": 0.9913194417953491,
|
|
"rewards/frontier_aurc_reward": -0.0012379781110212207,
|
|
"rewards/frontier_coverage_1": 0.030451742745935918,
|
|
"rewards/frontier_coverage_10": 0.030451742745935918,
|
|
"rewards/frontier_coverage_15": 0.030320987850427628,
|
|
"rewards/frontier_coverage_20": 0.03499968759715557,
|
|
"rewards/frontier_coverage_25": 0.09922654330730438,
|
|
"rewards/frontier_coverage_5": 0.030451742745935918,
|
|
"rewards/frontier_ece_reward": 0.00628520967438817,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14856770932674407,
|
|
"signal/accuracy_reward/group_std_mean": 0.2042417496442795,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07428385466337203,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07428385466337203,
|
|
"signal/advantage_abs_mean": 0.08372878432273864,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08372878432273864,
|
|
"signal/advantage_pre_scale_std": 0.1532825142145157,
|
|
"signal/advantage_std": 0.1532825142145157,
|
|
"signal/brier_reward/centered_abs_mean": 0.12630099207162857,
|
|
"signal/brier_reward/group_std_mean": 0.16852413713932038,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01578762400895357,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01578762400895357,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037941998615860936,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.055057863146066664,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004742749826982617,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004742749826982617,
|
|
"signal/format_reward/centered_abs_mean": 0.014756944379769266,
|
|
"signal/format_reward/group_std_mean": 0.026993418857455254,
|
|
"signal/format_reward/group_zero_std_frac": 0.8916666626930236,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007378472189884633,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007378472189884633,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017084946855902671,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0031451730988919734,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.058205293200444e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.058205293200444e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14232682287693024,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19747399687767028,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025476500391960143,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025476500391960143,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14232682287693024,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19747399687767028,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025476500391960143,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025476500391960143,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14066008031368255,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19527221620082855,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002517815353348851,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002517815353348851,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07579737156629562,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10598736554384232,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013567729154601693,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013567729154601693,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07291418462991714,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09304469972848892,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013051638146862389,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013051638146862389,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14232682287693024,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19747399687767028,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025476500391960143,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025476500391960143,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00866670086979866,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011408805288374424,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010833376087248324,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010833376087248324,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.0961958541907562,
|
|
"calibration/batch_distribution_entropy": 0.8477025009746469,
|
|
"calibration/buffer_distribution_entropy": 0.8498138843428966,
|
|
"calibration/confidence_entropy": 0.4101348990305955,
|
|
"calibration/coverage@0%": 0.088780644337697,
|
|
"calibration/coverage@1%": 0.088780644337697,
|
|
"calibration/coverage@10%": 0.5502983060324453,
|
|
"calibration/coverage@15%": 0.807155930454841,
|
|
"calibration/coverage@20%": 0.916858509725483,
|
|
"calibration/coverage@25%": 0.9773861186549823,
|
|
"calibration/coverage@30%": 0.9979057591623036,
|
|
"calibration/coverage@5%": 0.355199305315759,
|
|
"calibration/ece": 0.08873742643457942,
|
|
"calibration/mean_confidence": 0.7072252329346721,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009548611111111138,
|
|
"completions/max_length": 3770.4,
|
|
"completions/max_terminated_length": 3770.4,
|
|
"completions/mean_length": 837.043408203125,
|
|
"completions/mean_terminated_length": 845.1139404296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 270.4,
|
|
"epoch": 0.4319946000674992,
|
|
"grad_norm": 0.00045111271901987493,
|
|
"learning_rate": 8.433734939759036e-07,
|
|
"loss": -0.0074,
|
|
"num_tokens": 394251176.0,
|
|
"reward": 1.0717910051345825,
|
|
"reward_std": 0.12240722179412841,
|
|
"rewards/accuracy_reward": 0.7144965171813965,
|
|
"rewards/brier_reward": 0.807321059703827,
|
|
"rewards/confidence_uniqueness_reward": 0.9133202791213989,
|
|
"rewards/format_reward": 0.9904513955116272,
|
|
"rewards/frontier_aurc_reward": -0.0018045842181891203,
|
|
"rewards/frontier_coverage_1": 0.016695484053343534,
|
|
"rewards/frontier_coverage_10": 0.016695484053343534,
|
|
"rewards/frontier_coverage_15": 0.017037773295305668,
|
|
"rewards/frontier_coverage_20": 0.028835199400782587,
|
|
"rewards/frontier_coverage_25": 0.10415665209293365,
|
|
"rewards/frontier_coverage_5": 0.016695484053343534,
|
|
"rewards/frontier_ece_reward": 0.00549684651196003,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15064561367034912,
|
|
"signal/accuracy_reward/group_std_mean": 0.2003714770078659,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4194444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07532280683517456,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07532280683517456,
|
|
"signal/advantage_abs_mean": 0.08792258501052856,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08792258501052856,
|
|
"signal/advantage_pre_scale_std": 0.15989961624145507,
|
|
"signal/advantage_std": 0.15989961624145507,
|
|
"signal/brier_reward/centered_abs_mean": 0.13230671286582946,
|
|
"signal/brier_reward/group_std_mean": 0.17412598431110382,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016538339108228682,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016538339108228682,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.040982935577631,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06092101261019707,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005122866947203875,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005122866947203875,
|
|
"signal/format_reward/centered_abs_mean": 0.01662326380610466,
|
|
"signal/format_reward/group_std_mean": 0.03184187039732933,
|
|
"signal/format_reward/group_zero_std_frac": 0.8666666626930237,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00831163190305233,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00831163190305233,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023748093051835896,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004061697609722614,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2509083868935704e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2509083868935704e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13885007798671722,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18973132073879242,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024854163639247417,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024854163639247417,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13885007798671722,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18973132073879242,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024854163639247417,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024854163639247417,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13509701192378998,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18487387001514435,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002418236620724201,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002418236620724201,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06975524574518203,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.095186148583889,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001248618890531361,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001248618890531361,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0811617761850357,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.103651861846447,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014527957886457444,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014527957886457444,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13885007798671722,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18973132073879242,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024854163639247417,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024854163639247417,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008750239573419093,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011527445912361146,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010937799466773867,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010937799466773867,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18495376889754953,
|
|
"calibration/batch_distribution_entropy": 0.8803191163454155,
|
|
"calibration/buffer_distribution_entropy": 0.851482679855412,
|
|
"calibration/confidence_entropy": 0.4354021479497231,
|
|
"calibration/coverage@0%": 0.012587749815716776,
|
|
"calibration/coverage@1%": 0.012587749815716776,
|
|
"calibration/coverage@10%": 0.1918354407163473,
|
|
"calibration/coverage@15%": 0.36826092817701916,
|
|
"calibration/coverage@20%": 0.6374964095137642,
|
|
"calibration/coverage@25%": 0.8755843999038427,
|
|
"calibration/coverage@30%": 0.9279373368146213,
|
|
"calibration/coverage@5%": 0.06898461665644785,
|
|
"calibration/ece": 0.14425179951501588,
|
|
"calibration/mean_confidence": 0.6684984209096279,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009461805555555558,
|
|
"completions/max_length": 3679.8,
|
|
"completions/max_terminated_length": 3679.8,
|
|
"completions/mean_length": 818.1920043945313,
|
|
"completions/mean_terminated_length": 826.032080078125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 217.8,
|
|
"epoch": 0.44399445006937416,
|
|
"grad_norm": 0.0004495714674703777,
|
|
"learning_rate": 6.927710843373495e-07,
|
|
"loss": -0.0085,
|
|
"num_tokens": 406766796.0,
|
|
"reward": 1.0636092185974122,
|
|
"reward_std": 0.12089930176734924,
|
|
"rewards/accuracy_reward": 0.6918402791023255,
|
|
"rewards/brier_reward": 0.8123436450958252,
|
|
"rewards/confidence_uniqueness_reward": 0.925143015384674,
|
|
"rewards/format_reward": 0.9905382037162781,
|
|
"rewards/frontier_aurc_reward": -0.0011275873170234264,
|
|
"rewards/frontier_coverage_1": 0.027901495201513173,
|
|
"rewards/frontier_coverage_10": 0.027901495201513173,
|
|
"rewards/frontier_coverage_15": 0.02855590097606182,
|
|
"rewards/frontier_coverage_20": 0.0363032516092062,
|
|
"rewards/frontier_coverage_25": 0.1104502335190773,
|
|
"rewards/frontier_coverage_5": 0.027901495201513173,
|
|
"rewards/frontier_ece_reward": 0.00494370711967349,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15333116352558135,
|
|
"signal/accuracy_reward/group_std_mean": 0.20526001155376433,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40833333134651184,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07666558176279067,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07666558176279067,
|
|
"signal/advantage_abs_mean": 0.08671480715274811,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08671480715274811,
|
|
"signal/advantage_pre_scale_std": 0.15482214391231536,
|
|
"signal/advantage_std": 0.15482214391231536,
|
|
"signal/brier_reward/centered_abs_mean": 0.1261175572872162,
|
|
"signal/brier_reward/group_std_mean": 0.164833265542984,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015764694660902023,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015764694660902023,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03562588319182396,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.055293154716491696,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004453235398977995,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004453235398977995,
|
|
"signal/format_reward/centered_abs_mean": 0.016520182229578496,
|
|
"signal/format_reward/group_std_mean": 0.032335417345166206,
|
|
"signal/format_reward/group_zero_std_frac": 0.8638888835906983,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008260091114789248,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008260091114789248,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013634230359457432,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0024055395508185027,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4405272415606306e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4405272415606306e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15479598343372344,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20758814811706544,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002770848013460636,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002770848013460636,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15479598343372344,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20758814811706544,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002770848013460636,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002770848013460636,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14861891269683838,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19950321912765503,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002660278417170048,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002660278417170048,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07016772180795669,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09428980499505997,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012560022063553334,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012560022063553334,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07735366076231003,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09949797540903091,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013846305664628744,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013846305664628744,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15479598343372344,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20758814811706544,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002770848013460636,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002770848013460636,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008560269139707088,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011409120261669159,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001070033642463386,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001070033642463386,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.15022940629833742,
|
|
"calibration/batch_distribution_entropy": 0.8461052918312866,
|
|
"calibration/buffer_distribution_entropy": 0.8533362490457753,
|
|
"calibration/confidence_entropy": 0.44849927036648723,
|
|
"calibration/coverage@0%": 0.021875,
|
|
"calibration/coverage@1%": 0.021875,
|
|
"calibration/coverage@10%": 0.4367375159578275,
|
|
"calibration/coverage@15%": 0.576325098459507,
|
|
"calibration/coverage@20%": 0.7084946935173707,
|
|
"calibration/coverage@25%": 0.9614501740644037,
|
|
"calibration/coverage@30%": 0.9932291666666668,
|
|
"calibration/coverage@5%": 0.14235813765516317,
|
|
"calibration/ece": 0.1212236417116653,
|
|
"calibration/mean_confidence": 0.6864637556987978,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006249999999999978,
|
|
"completions/max_length": 3390.8,
|
|
"completions/max_terminated_length": 3390.8,
|
|
"completions/mean_length": 811.7388916015625,
|
|
"completions/mean_terminated_length": 816.8151123046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 219.6,
|
|
"epoch": 0.45599430007124914,
|
|
"grad_norm": 0.0004526027769315988,
|
|
"learning_rate": 5.421686746987952e-07,
|
|
"loss": -0.0038,
|
|
"num_tokens": 419200972.0,
|
|
"reward": 1.086756706237793,
|
|
"reward_std": 0.11896635293960571,
|
|
"rewards/accuracy_reward": 0.7328124880790711,
|
|
"rewards/brier_reward": 0.8254269242286683,
|
|
"rewards/confidence_uniqueness_reward": 0.92621408700943,
|
|
"rewards/format_reward": 0.99375,
|
|
"rewards/frontier_aurc_reward": -0.0012867568526417016,
|
|
"rewards/frontier_coverage_1": 0.01327955424785614,
|
|
"rewards/frontier_coverage_10": 0.01327955424785614,
|
|
"rewards/frontier_coverage_15": 0.01672282423824072,
|
|
"rewards/frontier_coverage_20": 0.0356328509747982,
|
|
"rewards/frontier_coverage_25": 0.13131238967180253,
|
|
"rewards/frontier_coverage_5": 0.01327955424785614,
|
|
"rewards/frontier_ece_reward": 0.004340594261884689,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15990668535232544,
|
|
"signal/accuracy_reward/group_std_mean": 0.21131123900413512,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07995334267616272,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07995334267616272,
|
|
"signal/advantage_abs_mean": 0.08612867295742035,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08612867295742035,
|
|
"signal/advantage_pre_scale_std": 0.15164274871349334,
|
|
"signal/advantage_std": 0.15164274871349334,
|
|
"signal/brier_reward/centered_abs_mean": 0.11984222829341888,
|
|
"signal/brier_reward/group_std_mean": 0.15931495130062104,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01498027853667736,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01498027853667736,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0317846491932869,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04874978512525559,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003973081149160862,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003973081149160862,
|
|
"signal/format_reward/centered_abs_mean": 0.011197916697710752,
|
|
"signal/format_reward/group_std_mean": 0.023881056532263755,
|
|
"signal/format_reward/group_zero_std_frac": 0.8916666746139527,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005598958348855376,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005598958348855376,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016745397355407477,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003021475113928318,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.997425981448032e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.997425981448032e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1465451642870903,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19608235359191895,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002623158413916826,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002623158413916826,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1465451642870903,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19608235359191895,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002623158413916826,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002623158413916826,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13391998708248137,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1800734966993332,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002397167752496898,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002397167752496898,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.062118491530418395,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08400460481643676,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011119209812022746,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011119209812022746,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08528402894735336,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11039264798164368,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015265840804204345,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015265840804204345,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1465451642870903,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19608235359191895,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002623158413916826,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002623158413916826,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00753614604473114,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010234573669731618,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009420182555913925,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009420182555913925,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.14937174985204743,
|
|
"calibration/batch_distribution_entropy": 0.8768565334857193,
|
|
"calibration/buffer_distribution_entropy": 0.8564208478703119,
|
|
"calibration/confidence_entropy": 0.4413662764250832,
|
|
"calibration/coverage@0%": 0.041348916887709995,
|
|
"calibration/coverage@1%": 0.08378923519009726,
|
|
"calibration/coverage@10%": 0.40936488333469506,
|
|
"calibration/coverage@15%": 0.5909852478554963,
|
|
"calibration/coverage@20%": 0.6877917325599496,
|
|
"calibration/coverage@25%": 0.794470757533946,
|
|
"calibration/coverage@30%": 0.9077784145987675,
|
|
"calibration/coverage@5%": 0.21624305560365026,
|
|
"calibration/ece": 0.1298338782403467,
|
|
"calibration/mean_confidence": 0.6607892197608017,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011111111111111117,
|
|
"completions/max_length": 3528.2,
|
|
"completions/max_terminated_length": 3528.2,
|
|
"completions/mean_length": 832.9720458984375,
|
|
"completions/mean_terminated_length": 842.4790161132812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 214.2,
|
|
"epoch": 0.46799415007312406,
|
|
"grad_norm": 0.00047915452159941196,
|
|
"learning_rate": 3.91566265060241e-07,
|
|
"loss": -0.0095,
|
|
"num_tokens": 431877674.0,
|
|
"reward": 1.0597479343414307,
|
|
"reward_std": 0.12377008944749832,
|
|
"rewards/accuracy_reward": 0.6863715291023255,
|
|
"rewards/brier_reward": 0.8096499562263488,
|
|
"rewards/confidence_uniqueness_reward": 0.9205044031143188,
|
|
"rewards/format_reward": 0.9888020873069763,
|
|
"rewards/frontier_aurc_reward": -0.001665916945785284,
|
|
"rewards/frontier_coverage_1": 0.029197129979729654,
|
|
"rewards/frontier_coverage_10": 0.029197129979729654,
|
|
"rewards/frontier_coverage_15": 0.029610903933644295,
|
|
"rewards/frontier_coverage_20": 0.04197726622223854,
|
|
"rewards/frontier_coverage_25": 0.1404498651623726,
|
|
"rewards/frontier_coverage_5": 0.029197129979729654,
|
|
"rewards/frontier_ece_reward": 0.004466280713677407,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1545193150639534,
|
|
"signal/accuracy_reward/group_std_mean": 0.2042325258255005,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0772596575319767,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0772596575319767,
|
|
"signal/advantage_abs_mean": 0.09082913100719452,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09082913100719452,
|
|
"signal/advantage_pre_scale_std": 0.16084725856781007,
|
|
"signal/advantage_std": 0.16084725856781007,
|
|
"signal/brier_reward/centered_abs_mean": 0.12776512503623963,
|
|
"signal/brier_reward/group_std_mean": 0.16529574990272522,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015970640629529954,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015970640629529954,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03753339871764183,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.056541355699300765,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004691674839705229,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004691674839705229,
|
|
"signal/format_reward/centered_abs_mean": 0.017822265438735486,
|
|
"signal/format_reward/group_std_mean": 0.03260133340954781,
|
|
"signal/format_reward/group_zero_std_frac": 0.8694444537162781,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008911132719367743,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008911132719367743,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00225935832131654,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004012473439797759,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.044251254526898e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.044251254526898e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1396041989326477,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18966357111930848,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024989150697365403,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024989150697365403,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1396041989326477,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18966357111930848,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024989150697365403,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024989150697365403,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11660263985395432,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1599918618798256,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002087187208235264,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002087187208235264,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.060534077882766726,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08167311102151871,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010835599503479898,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010835599503479898,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0967460110783577,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12314206212759018,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017317535821348429,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017317535821348429,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1396041989326477,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18966357111930848,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024989150697365403,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024989150697365403,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007368762884289027,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010059486515820027,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009210953605361283,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009210953605361283,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1517635493969927,
|
|
"calibration/batch_distribution_entropy": 0.8032683764135182,
|
|
"calibration/buffer_distribution_entropy": 0.8594278407178504,
|
|
"calibration/confidence_entropy": 0.4228073073041322,
|
|
"calibration/coverage@0%": 0.05823443307656014,
|
|
"calibration/coverage@1%": 0.05823443307656014,
|
|
"calibration/coverage@10%": 0.4063801377610362,
|
|
"calibration/coverage@15%": 0.6229884169428966,
|
|
"calibration/coverage@20%": 0.6913565883420245,
|
|
"calibration/coverage@25%": 0.8279600708198709,
|
|
"calibration/coverage@30%": 0.941512982494816,
|
|
"calibration/coverage@5%": 0.15745867617066028,
|
|
"calibration/ece": 0.12641913652799427,
|
|
"calibration/mean_confidence": 0.7471995840116807,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006770833333333326,
|
|
"completions/max_length": 3694.8,
|
|
"completions/max_terminated_length": 3694.8,
|
|
"completions/mean_length": 800.4954956054687,
|
|
"completions/mean_terminated_length": 805.9992065429688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 240.0,
|
|
"epoch": 0.47999400007499904,
|
|
"grad_norm": 0.0005283089121803641,
|
|
"learning_rate": 2.409638554216868e-07,
|
|
"loss": -0.0058,
|
|
"num_tokens": 444167190.0,
|
|
"reward": 1.0729887247085572,
|
|
"reward_std": 0.11522095501422883,
|
|
"rewards/accuracy_reward": 0.7047742962837219,
|
|
"rewards/brier_reward": 0.8240418195724487,
|
|
"rewards/confidence_uniqueness_reward": 0.9152790069580078,
|
|
"rewards/format_reward": 0.9932291626930236,
|
|
"rewards/frontier_aurc_reward": -0.0018710391130298376,
|
|
"rewards/frontier_coverage_1": 0.030061314441263677,
|
|
"rewards/frontier_coverage_10": 0.030090765841305257,
|
|
"rewards/frontier_coverage_15": 0.030872286297380924,
|
|
"rewards/frontier_coverage_20": 0.04789535701274872,
|
|
"rewards/frontier_coverage_25": 0.1694835215806961,
|
|
"rewards/frontier_coverage_5": 0.030061314441263677,
|
|
"rewards/frontier_ece_reward": 0.004374683182686567,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1409125432372093,
|
|
"signal/accuracy_reward/group_std_mean": 0.18542096316814421,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.47222223281860354,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07045627161860465,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07045627161860465,
|
|
"signal/advantage_abs_mean": 0.08473498374223709,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08473498374223709,
|
|
"signal/advantage_pre_scale_std": 0.1543968439102173,
|
|
"signal/advantage_std": 0.1543968439102173,
|
|
"signal/brier_reward/centered_abs_mean": 0.11732118874788285,
|
|
"signal/brier_reward/group_std_mean": 0.15466432571411132,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014665148593485356,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014665148593485356,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.035984426736831665,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.054205088317394255,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004498053342103958,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004498053342103958,
|
|
"signal/format_reward/centered_abs_mean": 0.012174479011446238,
|
|
"signal/format_reward/group_std_mean": 0.02487517409026623,
|
|
"signal/format_reward/group_zero_std_frac": 0.8916666746139527,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006087239505723119,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006087239505723119,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023178313160315154,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004119249107316136,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.148917651036754e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.148917651036754e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10867589861154556,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15358475148677825,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019452984910458327,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019452984910458327,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10854685455560684,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15342499017715455,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019429885549470782,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019429885549470782,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08157736957073211,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11773104816675187,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014602348441258074,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014602348441258074,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.052034994959831236,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07054910808801651,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000931426405441016,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000931426405441016,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10885821878910065,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13929919749498368,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001948562078177929,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001948562078177929,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10867589861154556,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15358475148677825,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019452984910458327,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019452984910458327,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006094504240900278,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00841012941673398,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.008333333395421505,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007618130301125347,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007618130301125347,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.47999400007499904,
|
|
"eval_calibration/aurc": 0.20361683297711194,
|
|
"eval_calibration/batch_distribution_entropy": 0.7910456665612545,
|
|
"eval_calibration/buffer_distribution_entropy": 0.8607564757055215,
|
|
"eval_calibration/confidence_entropy": 0.40042139560153805,
|
|
"eval_calibration/coverage@0%": 0.1213037634408602,
|
|
"eval_calibration/coverage@1%": 0.1213037634408602,
|
|
"eval_calibration/coverage@10%": 0.1743951612903226,
|
|
"eval_calibration/coverage@15%": 0.46135752688172044,
|
|
"eval_calibration/coverage@20%": 0.7064852150537634,
|
|
"eval_calibration/coverage@25%": 0.8635752688172044,
|
|
"eval_calibration/coverage@30%": 0.9474126344086021,
|
|
"eval_calibration/coverage@5%": 0.1213037634408602,
|
|
"eval_calibration/ece": 0.17280676072345136,
|
|
"eval_calibration/mean_confidence": 0.7212002863951991,
|
|
"eval_completions/clipped_ratio": 0.005208333333333352,
|
|
"eval_completions/max_length": 2564.1666666666665,
|
|
"eval_completions/max_terminated_length": 2564.1666666666665,
|
|
"eval_completions/mean_length": 822.8699951171875,
|
|
"eval_completions/mean_terminated_length": 827.223876953125,
|
|
"eval_completions/min_length": 89.33333333333333,
|
|
"eval_completions/min_terminated_length": 267.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 444167190.0,
|
|
"eval_reward": 1.059295157591502,
|
|
"eval_reward_std": 0.2571010912458102,
|
|
"eval_rewards/accuracy_reward": 0.6935763955116272,
|
|
"eval_rewards/brier_reward": 0.8161595165729523,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8582899471124014,
|
|
"eval_rewards/format_reward": 0.9921875099341074,
|
|
"eval_rewards/frontier_aurc_reward": -0.0019197222621490557,
|
|
"eval_rewards/frontier_coverage_1": 0.0351586788892746,
|
|
"eval_rewards/frontier_coverage_10": 0.035206587674717106,
|
|
"eval_rewards/frontier_coverage_15": 0.03441356122493744,
|
|
"eval_rewards/frontier_coverage_20": 0.052705912540356316,
|
|
"eval_rewards/frontier_coverage_25": 0.17523299405972162,
|
|
"eval_rewards/frontier_coverage_5": 0.0351586788892746,
|
|
"eval_rewards/frontier_ece_reward": 0.004450828962338467,
|
|
"eval_runtime": 198.5455,
|
|
"eval_samples_per_second": 5.037,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.41162109375,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4593026836713155,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.205810546875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.205810546875,
|
|
"eval_signal/advantage_abs_mean": 0.22099632769823074,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.22099632769823074,
|
|
"eval_signal/advantage_pre_scale_std": 0.2554586206873258,
|
|
"eval_signal/advantage_std": 0.2554586206873258,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.21053502460320792,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2717415342728297,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02631687807540099,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02631687807540099,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06546468411882718,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0933909999827544,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008183085514853397,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008183085514853397,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.015136718594779571,
|
|
"eval_signal/format_reward/group_std_mean": 0.04419417337824901,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.750000019868215,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007568359297389786,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359297389786,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0032751309336163104,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007060678792186081,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.862484310152164e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.862484310152164e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.1731132542093595,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.2936793069044749,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030987270874902606,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030987270874902606,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.17259238163630167,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.2929122944672902,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003089403461975356,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003089403461975356,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.11876154070099194,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.212093619008859,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002125831456699719,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002125831456699719,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.07741224020719528,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.10848981390396754,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001385679099864016,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001385679099864016,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.22898491968711218,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.27611127495765686,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0040988298909117775,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0040988298909117775,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.1731132542093595,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.2936793069044749,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030987270874902606,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030987270874902606,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.008298173546791077,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.013287559927751621,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010372716933488846,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010372716933488846,
|
|
"eval_steps_per_second": 0.03,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.15245076106115,
|
|
"calibration/batch_distribution_entropy": 0.7577490647981495,
|
|
"calibration/buffer_distribution_entropy": 0.8615814010175041,
|
|
"calibration/confidence_entropy": 0.370367604405363,
|
|
"calibration/coverage@0%": 0.00835509138381201,
|
|
"calibration/coverage@1%": 0.00835509138381201,
|
|
"calibration/coverage@10%": 0.2857309478647174,
|
|
"calibration/coverage@15%": 0.7050908432970091,
|
|
"calibration/coverage@20%": 0.8035156789611826,
|
|
"calibration/coverage@25%": 0.8801878712529355,
|
|
"calibration/coverage@30%": 0.961588617212322,
|
|
"calibration/coverage@5%": 0.09116869360084283,
|
|
"calibration/ece": 0.12256849527723586,
|
|
"calibration/mean_confidence": 0.7609970584329514,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00659722222222221,
|
|
"completions/max_length": 3460.2,
|
|
"completions/max_terminated_length": 3460.2,
|
|
"completions/mean_length": 824.7370727539062,
|
|
"completions/mean_terminated_length": 830.19814453125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 214.4,
|
|
"epoch": 0.491993850076874,
|
|
"grad_norm": 0.000395325681893155,
|
|
"learning_rate": 9.036144578313253e-08,
|
|
"loss": -0.0053,
|
|
"num_tokens": 456734113.0,
|
|
"reward": 1.1002012491226196,
|
|
"reward_std": 0.11294516026973725,
|
|
"rewards/accuracy_reward": 0.7552083373069763,
|
|
"rewards/brier_reward": 0.8410086750984191,
|
|
"rewards/confidence_uniqueness_reward": 0.9115934729576111,
|
|
"rewards/format_reward": 0.9934027910232544,
|
|
"rewards/frontier_aurc_reward": -0.0013322666753083467,
|
|
"rewards/frontier_coverage_1": 0.01617803443223238,
|
|
"rewards/frontier_coverage_10": 0.016358466073870658,
|
|
"rewards/frontier_coverage_15": 0.022982559585943817,
|
|
"rewards/frontier_coverage_20": 0.05930071547627449,
|
|
"rewards/frontier_coverage_25": 0.22371198534965514,
|
|
"rewards/frontier_coverage_5": 0.01617803443223238,
|
|
"rewards/frontier_ece_reward": 0.003959867171943188,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1392578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1875597894191742,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.44722222685813906,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06962890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06962890625,
|
|
"signal/advantage_abs_mean": 0.07913011610507965,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07913011610507965,
|
|
"signal/advantage_pre_scale_std": 0.15015988945960998,
|
|
"signal/advantage_std": 0.15015988945960998,
|
|
"signal/brier_reward/centered_abs_mean": 0.11419829726219177,
|
|
"signal/brier_reward/group_std_mean": 0.1516292631626129,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014274787157773972,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014274787157773972,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03720296248793602,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.058379728347063065,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004650370310992002,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004650370310992002,
|
|
"signal/format_reward/centered_abs_mean": 0.012358940858393907,
|
|
"signal/format_reward/group_std_mean": 0.028991687297821044,
|
|
"signal/format_reward/group_zero_std_frac": 0.8611111283302307,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006179470429196953,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006179470429196953,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001682829950004816,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0030279669910669325,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.012265406141523e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.012265406141523e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11965394765138626,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16320410072803498,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021418056450784205,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021418056450784205,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11909585893154144,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16247594058513642,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002131815906614065,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002131815906614065,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08062577843666077,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11165858805179596,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014432014198973776,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014432014198973776,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05757641866803169,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07493609786033631,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010306178824976086,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010306178824976086,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1128468781709671,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1470080941915512,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020199591061100365,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020199591061100365,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11965394765138626,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16320410072803498,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021418056450784205,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021418056450784205,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006321498658508062,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.008466892875730991,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.008333333395421505,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007901873323135078,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007901873323135078,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.09685634602876492,
|
|
"calibration/batch_distribution_entropy": 0.7791143614888557,
|
|
"calibration/buffer_distribution_entropy": 0.8611139279672693,
|
|
"calibration/confidence_entropy": 0.38429371665391043,
|
|
"calibration/coverage@0%": 0.022687609075043632,
|
|
"calibration/coverage@1%": 0.022687609075043632,
|
|
"calibration/coverage@10%": 0.618178636717365,
|
|
"calibration/coverage@15%": 0.781235178457132,
|
|
"calibration/coverage@20%": 0.8722135499408289,
|
|
"calibration/coverage@25%": 0.9379210469362288,
|
|
"calibration/coverage@30%": 0.981675392670157,
|
|
"calibration/coverage@5%": 0.44410983252519953,
|
|
"calibration/ece": 0.0883028476897068,
|
|
"calibration/mean_confidence": 0.7646092393666405,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.005353009259259263,
|
|
"completions/max_length": 3607.0,
|
|
"completions/max_terminated_length": 3607.0,
|
|
"completions/mean_length": 825.1263020833334,
|
|
"completions/mean_terminated_length": 829.6256917317709,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 193.33333333333334,
|
|
"epoch": 0.49919376007799904,
|
|
"num_tokens": 464303434.0,
|
|
"reward": 1.0698885917663574,
|
|
"reward_std": 0.1222114438811938,
|
|
"rewards/accuracy_reward": 0.6950231591860453,
|
|
"rewards/brier_reward": 0.8242372075716654,
|
|
"rewards/confidence_uniqueness_reward": 0.9139418800671896,
|
|
"rewards/format_reward": 0.9945023059844971,
|
|
"rewards/frontier_aurc_reward": -0.001452996317918102,
|
|
"rewards/frontier_coverage_1": 0.03662515555818876,
|
|
"rewards/frontier_coverage_10": 0.03657694533467293,
|
|
"rewards/frontier_coverage_15": 0.03316311786572138,
|
|
"rewards/frontier_coverage_20": 0.06067184483011564,
|
|
"rewards/frontier_coverage_25": 0.20710508028666177,
|
|
"rewards/frontier_coverage_5": 0.03662515555818876,
|
|
"rewards/frontier_ece_reward": 0.00421436270698905,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15033637235562006,
|
|
"signal/accuracy_reward/group_std_mean": 0.2045428305864334,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40740742286046344,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07516818617781003,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07516818617781003,
|
|
"signal/advantage_abs_mean": 0.08676933993895848,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08676933993895848,
|
|
"signal/advantage_pre_scale_std": 0.15460805098215738,
|
|
"signal/advantage_std": 0.15460805098215738,
|
|
"signal/brier_reward/centered_abs_mean": 0.125543013215065,
|
|
"signal/brier_reward/group_std_mean": 0.16625908513863882,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015692876651883125,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015692876651883125,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03518642236789068,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05552714318037033,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004398302795986335,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004398302795986335,
|
|
"signal/format_reward/centered_abs_mean": 0.0103804978231589,
|
|
"signal/format_reward/group_std_mean": 0.026168825725714367,
|
|
"signal/format_reward/group_zero_std_frac": 0.8657407363255819,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00519024891157945,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00519024891157945,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019372629079346855,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035577377614875636,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.46770048054168e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.46770048054168e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12492906053860982,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17234125236670175,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022362301436563334,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022362301436563334,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12409953524669011,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17126783728599548,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002221381369357308,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002221381369357308,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07909848541021347,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11092762400706609,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00141586281824857,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00141586281824857,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05819156641761462,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0763649841149648,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010416289248193304,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010416289248193304,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12411411106586456,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16261087854703268,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002221642527729273,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002221642527729273,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12492906053860982,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17234125236670175,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022362301436563334,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022362301436563334,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006716146133840084,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009008504450321198,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.004629629664123058,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008395182667300105,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008395182667300105,
|
|
"step": 208,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.01026152794320996,
|
|
"train_runtime": 40759.4999,
|
|
"train_samples_per_second": 0.368,
|
|
"train_steps_per_second": 0.005
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 208,
|
|
"num_input_tokens_seen": 464303434,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 6,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|