6365 lines
402 KiB
JSON
6365 lines
402 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.49919376007799904,
|
|
"eval_steps": 50,
|
|
"global_step": 208,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.513840768030151,
|
|
"calibration/batch_distribution_entropy": 0.26584757497792166,
|
|
"calibration/confidence_entropy": 0.21558799826726477,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4655645169967362,
|
|
"calibration/mean_confidence": 0.9180227775200323,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.020225694444444442,
|
|
"completions/max_length": 3981.4,
|
|
"completions/max_terminated_length": 3981.4,
|
|
"completions/mean_length": 512.44609375,
|
|
"completions/mean_terminated_length": 523.0315673828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.011999850001874977,
|
|
"grad_norm": 0.004560694098472595,
|
|
"learning_rate": 5.952380952380953e-07,
|
|
"loss": 0.0067,
|
|
"num_tokens": 9017587.0,
|
|
"reward": 0.4770378410816193,
|
|
"reward_std": 0.4403727948665619,
|
|
"rewards/accuracy_reward": 0.26076388359069824,
|
|
"rewards/batch_coverage_0": 0.010037102550268174,
|
|
"rewards/batch_coverage_1": 0.010037102550268174,
|
|
"rewards/batch_coverage_5": 0.014826382603496313,
|
|
"rewards/brier_reward": 0.31144304275512696,
|
|
"rewards/confidence_uniqueness_reward": 0.2842143774032593,
|
|
"rewards/format_reward": 0.5980902671813965,
|
|
"rewards/frontier_aurc_reward": 0.27501711547374724,
|
|
"rewards/frontier_coverage_10": 0.27501711547374724,
|
|
"rewards/frontier_coverage_15": 0.27501711547374724,
|
|
"rewards/frontier_coverage_20": 0.27501711547374724,
|
|
"rewards/frontier_coverage_25": 0.27501711547374724,
|
|
"rewards/frontier_ece_reward": 0.27501711547374724,
|
|
"rewards/frontier_entropy_batch_reward": -0.5708152770996093,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.31002603769302367,
|
|
"signal/accuracy_reward/group_std_mean": 0.3681545317173004,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.09722222462296486,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15501301884651184,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15501301884651184,
|
|
"signal/advantage_abs_mean": 0.3793900489807129,
|
|
"signal/advantage_pre_scale_abs_mean": 0.3793900489807129,
|
|
"signal/advantage_pre_scale_std": 0.446374899148941,
|
|
"signal/advantage_std": 0.446374899148941,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.017992386221885683,
|
|
"signal/batch_coverage_0/group_std_mean": 0.03413466662168503,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0002249048266094178,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0002249048266094178,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.017992386221885683,
|
|
"signal/batch_coverage_1/group_std_mean": 0.03413466662168503,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0002249048266094178,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0002249048266094178,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.021636892482638358,
|
|
"signal/batch_coverage_5/group_std_mean": 0.038276906311511996,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0002704611368244514,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0002704611368244514,
|
|
"signal/brier_reward/centered_abs_mean": 0.3212303102016449,
|
|
"signal/brier_reward/group_std_mean": 0.37363110184669496,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0321230299770832,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0321230299770832,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.23388004302978516,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2874164402484894,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023388004675507546,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023388004675507546,
|
|
"signal/format_reward/centered_abs_mean": 0.44241536855697633,
|
|
"signal/format_reward/group_std_mean": 0.4763067066669464,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.22120768427848816,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.22120768427848816,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.3114211916923523,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.36827362775802613,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038927650079131127,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038927650079131127,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3114211916923523,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.36827362775802613,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038927650079131127,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038927650079131127,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3114211916923523,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.36827362775802613,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038927650079131127,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038927650079131127,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3114211916923523,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.36827362775802613,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038927650079131127,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038927650079131127,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3114211916923523,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.36827362775802613,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038927650079131127,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038927650079131127,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.3114211916923523,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.36827362775802613,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.031142120063304902,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.031142120063304902,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4527323842048645,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4842097818851471,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04527323916554451,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04527323916554451,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5282848081012392,
|
|
"calibration/batch_distribution_entropy": 0.27169709045908236,
|
|
"calibration/confidence_entropy": 0.2219471870840591,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4804737987051041,
|
|
"calibration/mean_confidence": 0.9188223019076072,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.016840277777777767,
|
|
"completions/max_length": 3852.2,
|
|
"completions/max_terminated_length": 3852.2,
|
|
"completions/mean_length": 476.1991455078125,
|
|
"completions/mean_terminated_length": 484.47950439453126,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 15.4,
|
|
"epoch": 0.023999700003749954,
|
|
"grad_norm": 0.003513866802677512,
|
|
"learning_rate": 1.1904761904761906e-06,
|
|
"loss": 0.0026,
|
|
"num_tokens": 17586121.0,
|
|
"reward": 0.5508717119693756,
|
|
"reward_std": 0.41305696964263916,
|
|
"rewards/accuracy_reward": 0.28906250596046446,
|
|
"rewards/batch_coverage_0": 0.005847562197595834,
|
|
"rewards/batch_coverage_1": 0.005847562197595834,
|
|
"rewards/batch_coverage_5": 0.013559611327946187,
|
|
"rewards/brier_reward": 0.35270223021507263,
|
|
"rewards/confidence_uniqueness_reward": 0.3545986533164978,
|
|
"rewards/format_reward": 0.7063368201255799,
|
|
"rewards/frontier_aurc_reward": 0.3056594967842102,
|
|
"rewards/frontier_coverage_10": 0.3056594967842102,
|
|
"rewards/frontier_coverage_15": 0.3056594967842102,
|
|
"rewards/frontier_coverage_20": 0.3056594967842102,
|
|
"rewards/frontier_coverage_25": 0.3056594967842102,
|
|
"rewards/frontier_ece_reward": 0.3056594967842102,
|
|
"rewards/frontier_entropy_batch_reward": -0.6754338145256042,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.3150173544883728,
|
|
"signal/accuracy_reward/group_std_mean": 0.3763134777545929,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.06666666828095913,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1575086772441864,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1575086772441864,
|
|
"signal/advantage_abs_mean": 0.3442532241344452,
|
|
"signal/advantage_pre_scale_abs_mean": 0.3442532241344452,
|
|
"signal/advantage_pre_scale_std": 0.41874762773513796,
|
|
"signal/advantage_std": 0.41874762773513796,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.014644789323210717,
|
|
"signal/batch_coverage_0/group_std_mean": 0.03030555061995983,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0001830598688684404,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0001830598688684404,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.014644789323210717,
|
|
"signal/batch_coverage_1/group_std_mean": 0.03030555061995983,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0001830598688684404,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0001830598688684404,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.01787478234618902,
|
|
"signal/batch_coverage_5/group_std_mean": 0.034881545975804326,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.00022343478340189905,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.00022343478340189905,
|
|
"signal/brier_reward/centered_abs_mean": 0.31458155512809755,
|
|
"signal/brier_reward/group_std_mean": 0.3693169116973877,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03145815543830395,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03145815543830395,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.22111876904964448,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2776468515396118,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02211187668144703,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02211187668144703,
|
|
"signal/format_reward/centered_abs_mean": 0.358349609375,
|
|
"signal/format_reward/group_std_mean": 0.4222680389881134,
|
|
"signal/format_reward/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1791748046875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1791748046875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.3109705686569214,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.37028737664222716,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003887132229283452,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003887132229283452,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3109705686569214,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.37028737664222716,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003887132229283452,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003887132229283452,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3109705686569214,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.37028737664222716,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003887132229283452,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003887132229283452,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3109705686569214,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.37028737664222716,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003887132229283452,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003887132229283452,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3109705686569214,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.37028737664222716,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003887132229283452,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003887132229283452,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.3109705686569214,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.37028737664222716,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.031097057834267618,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.031097057834267618,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38306826949119566,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4421080708503723,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03830682709813118,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03830682709813118,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5546289088196792,
|
|
"calibration/batch_distribution_entropy": 0.282616612874583,
|
|
"calibration/confidence_entropy": 0.23622576694424477,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.5222009122277959,
|
|
"calibration/mean_confidence": 0.9140306352710589,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012760416666666673,
|
|
"completions/max_length": 3941.2,
|
|
"completions/max_terminated_length": 3941.2,
|
|
"completions/mean_length": 413.34149169921875,
|
|
"completions/mean_terminated_length": 418.70787353515624,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 22.6,
|
|
"epoch": 0.03599955000562493,
|
|
"grad_norm": 0.001919919392094016,
|
|
"learning_rate": 1.7857142857142859e-06,
|
|
"loss": -0.0085,
|
|
"num_tokens": 25449799.0,
|
|
"reward": 0.674442708492279,
|
|
"reward_std": 0.3263914942741394,
|
|
"rewards/accuracy_reward": 0.30798611640930174,
|
|
"rewards/batch_coverage_0": 0.009828866459429264,
|
|
"rewards/batch_coverage_1": 0.009828866459429264,
|
|
"rewards/batch_coverage_5": 0.01845599301159382,
|
|
"rewards/brier_reward": 0.4136672496795654,
|
|
"rewards/confidence_uniqueness_reward": 0.4963163256645203,
|
|
"rewards/format_reward": 0.9253471970558167,
|
|
"rewards/frontier_aurc_reward": 0.3368179976940155,
|
|
"rewards/frontier_coverage_10": 0.3368179976940155,
|
|
"rewards/frontier_coverage_15": 0.3368179976940155,
|
|
"rewards/frontier_coverage_20": 0.3368179976940155,
|
|
"rewards/frontier_coverage_25": 0.3368179976940155,
|
|
"rewards/frontier_ece_reward": 0.3368179976940155,
|
|
"rewards/frontier_entropy_batch_reward": -0.8843168973922729,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.3130750894546509,
|
|
"signal/accuracy_reward/group_std_mean": 0.3723655939102173,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.0888888917863369,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15653754472732545,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15653754472732545,
|
|
"signal/advantage_abs_mean": 0.26360846757888795,
|
|
"signal/advantage_pre_scale_abs_mean": 0.26360846757888795,
|
|
"signal/advantage_pre_scale_std": 0.3359541893005371,
|
|
"signal/advantage_std": 0.3359541893005371,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.019277606159448624,
|
|
"signal/batch_coverage_0/group_std_mean": 0.03661954514682293,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.00024097008863463998,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.00024097008863463998,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.019277606159448624,
|
|
"signal/batch_coverage_1/group_std_mean": 0.03661954514682293,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.00024097008863463998,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.00024097008863463998,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.0213375274091959,
|
|
"signal/batch_coverage_5/group_std_mean": 0.04047676026821136,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0002667191030923277,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0002667191030923277,
|
|
"signal/brier_reward/centered_abs_mean": 0.2950912415981293,
|
|
"signal/brier_reward/group_std_mean": 0.3476456940174103,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029509123787283896,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.029509123787283896,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1892547070980072,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.23724495470523835,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018925471231341362,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018925471231341362,
|
|
"signal/format_reward/centered_abs_mean": 0.1256618916988373,
|
|
"signal/format_reward/group_std_mean": 0.20962986648082732,
|
|
"signal/format_reward/group_zero_std_frac": 0.25277777947485447,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06283094584941865,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.06283094584941865,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.3047790229320526,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3611644208431244,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038097379729151726,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038097379729151726,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3047790229320526,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3611644208431244,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038097379729151726,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038097379729151726,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3047790229320526,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3611644208431244,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038097379729151726,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038097379729151726,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3047790229320526,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3611644208431244,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038097379729151726,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038097379729151726,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3047790229320526,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3611644208431244,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038097379729151726,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038097379729151726,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.3047790229320526,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3611644208431244,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03047790378332138,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03047790378332138,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18899438977241517,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.29560824632644656,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06944444738328456,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01889943927526474,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01889943927526474,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4804054108367783,
|
|
"calibration/batch_distribution_entropy": 0.40427036512005454,
|
|
"calibration/buffer_distribution_entropy": 0.3089126685339063,
|
|
"calibration/confidence_entropy": 0.3088122348218766,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4038282054785043,
|
|
"calibration/mean_confidence": 0.8826844795036841,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011024305555555558,
|
|
"completions/max_length": 3790.6,
|
|
"completions/max_terminated_length": 3790.6,
|
|
"completions/mean_length": 441.2155456542969,
|
|
"completions/mean_terminated_length": 446.20184326171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 83.0,
|
|
"epoch": 0.04799940000749991,
|
|
"grad_norm": 0.0009271673625335097,
|
|
"learning_rate": 2.380952380952381e-06,
|
|
"loss": -0.0099,
|
|
"num_tokens": 33646298.0,
|
|
"reward": 0.7545804619789124,
|
|
"reward_std": 0.26256604194641114,
|
|
"rewards/accuracy_reward": 0.42638888359069826,
|
|
"rewards/batch_coverage_0": 0.02317237965762615,
|
|
"rewards/batch_coverage_1": 0.02317237965762615,
|
|
"rewards/batch_coverage_5": 0.0431126669049263,
|
|
"rewards/brier_reward": 0.5432393312454223,
|
|
"rewards/confidence_uniqueness_reward": 0.5931422591209412,
|
|
"rewards/format_reward": 0.9826388835906983,
|
|
"rewards/frontier_aurc_reward": 0.1815725381486118,
|
|
"rewards/frontier_coverage_10": 0.19049288937821984,
|
|
"rewards/frontier_coverage_15": 0.19049288937821984,
|
|
"rewards/frontier_coverage_20": 0.19049288937821984,
|
|
"rewards/frontier_coverage_25": 0.19049288937821984,
|
|
"rewards/frontier_ece_reward": 0.17414389420300722,
|
|
"rewards/frontier_entropy_batch_reward": -0.9389851570129395,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.302484804391861,
|
|
"signal/accuracy_reward/group_std_mean": 0.36863426566123964,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.07500000223517418,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1512424021959305,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1512424021959305,
|
|
"signal/advantage_abs_mean": 0.2110252559185028,
|
|
"signal/advantage_pre_scale_abs_mean": 0.2110252559185028,
|
|
"signal/advantage_pre_scale_std": 0.27054711878299714,
|
|
"signal/advantage_std": 0.27054711878299714,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.029810138419270514,
|
|
"signal/batch_coverage_0/group_std_mean": 0.053014175593853,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.00037262673722580073,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.00037262673722580073,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.029810138419270514,
|
|
"signal/batch_coverage_1/group_std_mean": 0.053014175593853,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.00037262673722580073,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.00037262673722580073,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.03350967057049274,
|
|
"signal/batch_coverage_5/group_std_mean": 0.060439922660589215,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.00041887088445946574,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.00041887088445946574,
|
|
"signal/brier_reward/centered_abs_mean": 0.2665267765522003,
|
|
"signal/brier_reward/group_std_mean": 0.3222979724407196,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02665267772972584,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02665267772972584,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1808074325323105,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.21495160162448884,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01808074340224266,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01808074340224266,
|
|
"signal/format_reward/centered_abs_mean": 0.03156467005610466,
|
|
"signal/format_reward/group_std_mean": 0.06585587337613105,
|
|
"signal/format_reward/group_zero_std_frac": 0.7111111164093018,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01578233502805233,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01578233502805233,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.1286643948405981,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.15596833908930421,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0016083051159512252,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0016083051159512252,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1443668570369482,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1845448948442936,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018045858887489885,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018045858887489885,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1443668570369482,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1845448948442936,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018045858887489885,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018045858887489885,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1443668570369482,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1845448948442936,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018045858887489885,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018045858887489885,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1443668570369482,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1845448948442936,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018045858887489885,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018045858887489885,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.21530763506889344,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.2578270256519318,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.021530764549970625,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.021530764549970625,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10629074722528457,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.20008701980113983,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.26111111640930174,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010629074834287166,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010629074834287166,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3739733238691446,
|
|
"calibration/batch_distribution_entropy": 0.5870144399005015,
|
|
"calibration/buffer_distribution_entropy": 0.3573886982610337,
|
|
"calibration/confidence_entropy": 0.3735786281956436,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.01424802110817942,
|
|
"calibration/coverage@15%": 0.0158311345646438,
|
|
"calibration/coverage@20%": 0.07757255936675461,
|
|
"calibration/coverage@25%": 0.1683377308707124,
|
|
"calibration/coverage@30%": 0.23076923076923075,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.26419371455997814,
|
|
"calibration/mean_confidence": 0.8383060171722525,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0109375,
|
|
"completions/max_length": 3731.0,
|
|
"completions/max_terminated_length": 3731.0,
|
|
"completions/mean_length": 480.8690185546875,
|
|
"completions/mean_terminated_length": 486.18568115234376,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 81.2,
|
|
"epoch": 0.05999925000937488,
|
|
"grad_norm": 0.0008118962869048119,
|
|
"learning_rate": 2.9761904761904763e-06,
|
|
"loss": -0.006,
|
|
"num_tokens": 42310357.0,
|
|
"reward": 0.8050299048423767,
|
|
"reward_std": 0.2187582403421402,
|
|
"rewards/accuracy_reward": 0.5324652791023254,
|
|
"rewards/batch_coverage_0": 0.052157065644860265,
|
|
"rewards/batch_coverage_1": 0.052157065644860265,
|
|
"rewards/batch_coverage_5": 0.0869151309132576,
|
|
"rewards/brier_reward": 0.6536402463912964,
|
|
"rewards/confidence_uniqueness_reward": 0.711832869052887,
|
|
"rewards/format_reward": 0.98671875,
|
|
"rewards/frontier_aurc_reward": -0.004462929721921682,
|
|
"rewards/frontier_coverage_10": 0.004508028365671634,
|
|
"rewards/frontier_coverage_15": 0.004508028365671634,
|
|
"rewards/frontier_coverage_20": 0.004508028365671634,
|
|
"rewards/frontier_coverage_25": 0.004508028365671634,
|
|
"rewards/frontier_ece_reward": 0.0053163919015787545,
|
|
"rewards/frontier_entropy_batch_reward": -0.942010247707367,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2765733480453491,
|
|
"signal/accuracy_reward/group_std_mean": 0.34388246536254885,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.11111111342906951,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13828667402267455,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.13828667402267455,
|
|
"signal/advantage_abs_mean": 0.17129428684711456,
|
|
"signal/advantage_pre_scale_abs_mean": 0.17129428684711456,
|
|
"signal/advantage_pre_scale_std": 0.2308095544576645,
|
|
"signal/advantage_std": 0.2308095544576645,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.04694191887974739,
|
|
"signal/batch_coverage_0/group_std_mean": 0.07194784879684449,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0005867740022949874,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0005867740022949874,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.04694191887974739,
|
|
"signal/batch_coverage_1/group_std_mean": 0.07194784879684449,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0005867740022949874,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0005867740022949874,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.054440264403820035,
|
|
"signal/batch_coverage_5/group_std_mean": 0.0844185248017311,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.000680503307376057,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.000680503307376057,
|
|
"signal/brier_reward/centered_abs_mean": 0.2154507130384445,
|
|
"signal/brier_reward/group_std_mean": 0.26925151944160464,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02154507227241993,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02154507227241993,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11235011965036393,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1403405025601387,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011235012207180262,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011235012207180262,
|
|
"signal/format_reward/centered_abs_mean": 0.023692491091787816,
|
|
"signal/format_reward/group_std_mean": 0.046519874781370166,
|
|
"signal/format_reward/group_zero_std_frac": 0.8055555701255799,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011846245545893908,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.011846245545893908,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032001953106373547,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004745307052507997,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0002439345698806e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0002439345698806e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.04560846015810967,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.07139405831694604,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0005701057554688305,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0005701057554688305,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.04560846015810967,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.07139405831694604,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0005701057554688305,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0005701057554688305,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.04560846015810967,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07139405831694604,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005701057554688305,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005701057554688305,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04560846015810967,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07139405831694604,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005701057554688305,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005701057554688305,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.12563625872135162,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.15056415796279907,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012563626095652581,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012563626095652581,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10224438160657882,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.20047296583652496,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3111111164093018,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010224438272416592,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010224438272416592,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31229524612584975,
|
|
"calibration/batch_distribution_entropy": 0.7004499996754895,
|
|
"calibration/buffer_distribution_entropy": 0.45483465437174503,
|
|
"calibration/confidence_entropy": 0.513617363200894,
|
|
"calibration/coverage@0%": 0.004972375690607734,
|
|
"calibration/coverage@1%": 0.004972375690607734,
|
|
"calibration/coverage@10%": 0.03314917127071823,
|
|
"calibration/coverage@15%": 0.05117873362066472,
|
|
"calibration/coverage@20%": 0.19240765776552665,
|
|
"calibration/coverage@25%": 0.31131295016053917,
|
|
"calibration/coverage@30%": 0.3671140551329149,
|
|
"calibration/coverage@5%": 0.004972375690607734,
|
|
"calibration/ece": 0.1306058569612572,
|
|
"calibration/mean_confidence": 0.7497172677075492,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.016927083333333325,
|
|
"completions/max_length": 4073.0,
|
|
"completions/max_terminated_length": 4073.0,
|
|
"completions/mean_length": 562.984375,
|
|
"completions/mean_terminated_length": 572.6265991210937,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 130.0,
|
|
"epoch": 0.07199910001124986,
|
|
"grad_norm": 0.0005453521152958274,
|
|
"learning_rate": 3.5714285714285718e-06,
|
|
"loss": -0.0099,
|
|
"num_tokens": 51905857.0,
|
|
"reward": 0.8408897519111633,
|
|
"reward_std": 0.1912807822227478,
|
|
"rewards/accuracy_reward": 0.5894097328186035,
|
|
"rewards/batch_coverage_0": 0.10398652851581573,
|
|
"rewards/batch_coverage_1": 0.10398652851581573,
|
|
"rewards/batch_coverage_5": 0.14916678071022033,
|
|
"rewards/brier_reward": 0.7172442197799682,
|
|
"rewards/confidence_uniqueness_reward": 0.7191949129104614,
|
|
"rewards/format_reward": 0.9804687619209289,
|
|
"rewards/frontier_aurc_reward": -0.003438209556043148,
|
|
"rewards/frontier_coverage_10": -0.009622192196547984,
|
|
"rewards/frontier_coverage_15": -0.009622192196547984,
|
|
"rewards/frontier_coverage_20": -0.009622192196547984,
|
|
"rewards/frontier_coverage_25": -0.009622192196547984,
|
|
"rewards/frontier_ece_reward": 0.014210501685738564,
|
|
"rewards/frontier_entropy_batch_reward": -0.9305463433265686,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.23632811903953552,
|
|
"signal/accuracy_reward/group_std_mean": 0.29865540862083434,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.1972222238779068,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11816405951976776,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11816405951976776,
|
|
"signal/advantage_abs_mean": 0.14621526300907134,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14621526300907134,
|
|
"signal/advantage_pre_scale_std": 0.2123253583908081,
|
|
"signal/advantage_std": 0.2123253583908081,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.0650339350104332,
|
|
"signal/batch_coverage_0/group_std_mean": 0.08658984303474426,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0008129241992719472,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0008129241992719472,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.0650339350104332,
|
|
"signal/batch_coverage_1/group_std_mean": 0.08658984303474426,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0008129241992719472,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0008129241992719472,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.07703163176774978,
|
|
"signal/batch_coverage_5/group_std_mean": 0.10550259202718734,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0009628954576328397,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0009628954576328397,
|
|
"signal/brier_reward/centered_abs_mean": 0.16051390171051025,
|
|
"signal/brier_reward/group_std_mean": 0.2036646395921707,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016051390767097475,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016051390767097475,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.10673338174819946,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1369411513209343,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01067333873361349,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01067333873361349,
|
|
"signal/format_reward/centered_abs_mean": 0.03227538987994194,
|
|
"signal/format_reward/group_std_mean": 0.05808700993657112,
|
|
"signal/format_reward/group_zero_std_frac": 0.7722222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01613769493997097,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01613769493997097,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016680840170010924,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0025632956065237524,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0851050066994503e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0851050066994503e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.06366863325238228,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.08629171699285507,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007958578993566334,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007958578993566334,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06366863325238228,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08629171699285507,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007958578993566334,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007958578993566334,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06366863325238228,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08629171699285507,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007958578993566334,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007958578993566334,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06366863325238228,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08629171699285507,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007958578993566334,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007958578993566334,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06942432820796966,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09012031108140946,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00694243311882019,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00694243311882019,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.11933754831552505,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.22226664423942566,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.28611111342906953,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0119337547570467,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0119337547570467,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26877965179808044,
|
|
"calibration/batch_distribution_entropy": 0.6958375018204611,
|
|
"calibration/buffer_distribution_entropy": 0.5517312888183341,
|
|
"calibration/confidence_entropy": 0.5662209793829128,
|
|
"calibration/coverage@0%": 0.0031946301247771833,
|
|
"calibration/coverage@1%": 0.0031946301247771833,
|
|
"calibration/coverage@10%": 0.02345290751697197,
|
|
"calibration/coverage@15%": 0.06455144834161955,
|
|
"calibration/coverage@20%": 0.10678191692627097,
|
|
"calibration/coverage@25%": 0.34911989726929393,
|
|
"calibration/coverage@30%": 0.7778451787587186,
|
|
"calibration/coverage@5%": 0.02238907772973793,
|
|
"calibration/ece": 0.07862013511861354,
|
|
"calibration/mean_confidence": 0.6996272744621839,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.019791666666666652,
|
|
"completions/max_length": 4004.6,
|
|
"completions/max_terminated_length": 4004.6,
|
|
"completions/mean_length": 592.697314453125,
|
|
"completions/mean_terminated_length": 604.7694091796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 156.6,
|
|
"epoch": 0.08399895001312484,
|
|
"grad_norm": 0.0006409003981389105,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": -0.0119,
|
|
"num_tokens": 61811170.0,
|
|
"reward": 0.8785522818565369,
|
|
"reward_std": 0.18069977462291717,
|
|
"rewards/accuracy_reward": 0.6352430582046509,
|
|
"rewards/batch_coverage_0": 0.13052503764629364,
|
|
"rewards/batch_coverage_1": 0.13052503764629364,
|
|
"rewards/batch_coverage_5": 0.1599203959107399,
|
|
"rewards/brier_reward": 0.7521831035614014,
|
|
"rewards/confidence_uniqueness_reward": 0.7364588737487793,
|
|
"rewards/format_reward": 0.9785590291023254,
|
|
"rewards/frontier_aurc_reward": -0.0027948385570198296,
|
|
"rewards/frontier_coverage_10": -0.019927990157157183,
|
|
"rewards/frontier_coverage_15": -0.019927990157157183,
|
|
"rewards/frontier_coverage_20": -0.019927990157157183,
|
|
"rewards/frontier_coverage_25": -0.019927990157157183,
|
|
"rewards/frontier_ece_reward": 0.00851638736203313,
|
|
"rewards/frontier_entropy_batch_reward": -0.8229541897773742,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.20976562201976776,
|
|
"signal/accuracy_reward/group_std_mean": 0.2661799848079681,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.28333333134651184,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10488281100988388,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10488281100988388,
|
|
"signal/advantage_abs_mean": 0.1369797557592392,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1369797557592392,
|
|
"signal/advantage_pre_scale_std": 0.20022883117198945,
|
|
"signal/advantage_std": 0.20022883117198945,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.0739258162677288,
|
|
"signal/batch_coverage_0/group_std_mean": 0.0961204543709755,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0009240727056749165,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0009240727056749165,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.0739258162677288,
|
|
"signal/batch_coverage_1/group_std_mean": 0.0961204543709755,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0009240727056749165,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0009240727056749165,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.08172721415758133,
|
|
"signal/batch_coverage_5/group_std_mean": 0.10734648406505584,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.001021590200252831,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.001021590200252831,
|
|
"signal/brier_reward/centered_abs_mean": 0.1320378601551056,
|
|
"signal/brier_reward/group_std_mean": 0.17155353724956512,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013203786127269268,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013203786127269268,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.13590224981307983,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.16518638134002686,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013590225391089917,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013590225391089917,
|
|
"signal/format_reward/centered_abs_mean": 0.03350151963531971,
|
|
"signal/format_reward/group_std_mean": 0.05711895748972893,
|
|
"signal/format_reward/group_zero_std_frac": 0.7833333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016750759817659854,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.016750759817659854,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012007270473986865,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0018108490156009793,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5009087655926124e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5009087655926124e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.07640385776758193,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.10052279233932496,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0009550482034683227,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009550482034683227,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07640385776758193,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10052279233932496,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009550482034683227,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009550482034683227,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07640385776758193,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10052279233932496,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009550482034683227,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009550482034683227,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07640385776758193,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10052279233932496,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009550482034683227,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009550482034683227,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.046917397528886795,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07008428052067757,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004691739659756422,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004691739659756422,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26900469660758974,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3844991743564606,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.03888888917863369,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02690046988427639,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02690046988427639,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2918666808086499,
|
|
"calibration/batch_distribution_entropy": 0.8318403951170834,
|
|
"calibration/buffer_distribution_entropy": 0.6226201058992261,
|
|
"calibration/confidence_entropy": 0.566416729480696,
|
|
"calibration/coverage@0%": 0.005800670961872395,
|
|
"calibration/coverage@1%": 0.005800670961872395,
|
|
"calibration/coverage@10%": 0.020537513067135554,
|
|
"calibration/coverage@15%": 0.06033638488309197,
|
|
"calibration/coverage@20%": 0.2585049289891395,
|
|
"calibration/coverage@25%": 0.334795433027012,
|
|
"calibration/coverage@30%": 0.4807208784944665,
|
|
"calibration/coverage@5%": 0.005800670961872395,
|
|
"calibration/ece": 0.13819415249258343,
|
|
"calibration/mean_confidence": 0.644685241560562,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017795138888888905,
|
|
"completions/max_length": 3761.8,
|
|
"completions/max_terminated_length": 3761.8,
|
|
"completions/mean_length": 624.34384765625,
|
|
"completions/mean_terminated_length": 635.7164794921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 187.0,
|
|
"epoch": 0.09599880001499982,
|
|
"grad_norm": 0.00047349781380034983,
|
|
"learning_rate": 4.761904761904762e-06,
|
|
"loss": -0.0127,
|
|
"num_tokens": 72123131.0,
|
|
"reward": 0.9260114431381226,
|
|
"reward_std": 0.1733134239912033,
|
|
"rewards/accuracy_reward": 0.6411458253860474,
|
|
"rewards/batch_coverage_0": 0.13761214762926102,
|
|
"rewards/batch_coverage_1": 0.13761214762926102,
|
|
"rewards/batch_coverage_5": 0.18339109122753144,
|
|
"rewards/brier_reward": 0.7414672374725342,
|
|
"rewards/confidence_uniqueness_reward": 0.9075051188468933,
|
|
"rewards/format_reward": 0.9793402910232544,
|
|
"rewards/frontier_aurc_reward": -0.0025908468291163445,
|
|
"rewards/frontier_coverage_10": -0.03410651609301567,
|
|
"rewards/frontier_coverage_15": -0.03410651609301567,
|
|
"rewards/frontier_coverage_20": -0.03410651609301567,
|
|
"rewards/frontier_coverage_25": -0.03410651609301567,
|
|
"rewards/frontier_ece_reward": -0.007755273941438645,
|
|
"rewards/frontier_entropy_batch_reward": -0.5234831273555756,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18829209804534913,
|
|
"signal/accuracy_reward/group_std_mean": 0.2498236119747162,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.29444444477558135,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09414604902267457,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09414604902267457,
|
|
"signal/advantage_abs_mean": 0.12837716192007065,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12837716192007065,
|
|
"signal/advantage_pre_scale_std": 0.19455285966396332,
|
|
"signal/advantage_std": 0.19455285966396332,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.11938119679689407,
|
|
"signal/batch_coverage_0/group_std_mean": 0.1603047639131546,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0014922650530934333,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0014922650530934333,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.11938119679689407,
|
|
"signal/batch_coverage_1/group_std_mean": 0.1603047639131546,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0014922650530934333,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0014922650530934333,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.13489190638065338,
|
|
"signal/batch_coverage_5/group_std_mean": 0.18068302273750306,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.00168614883441478,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.00168614883441478,
|
|
"signal/brier_reward/centered_abs_mean": 0.1544518440961838,
|
|
"signal/brier_reward/group_std_mean": 0.2000021755695343,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015445184521377086,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015445184521377086,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06350446417927742,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09182434678077697,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006350446399301291,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006350446399301291,
|
|
"signal/format_reward/centered_abs_mean": 0.03394097276031971,
|
|
"signal/format_reward/group_std_mean": 0.05979543775320053,
|
|
"signal/format_reward/group_zero_std_frac": 0.7638888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016970486380159855,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.016970486380159855,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014213901478797198,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002241401933133602,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.776737699401565e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.776737699401565e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13013684004545212,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17877933233976365,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016267105238512158,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016267105238512158,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13013684004545212,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17877933233976365,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016267105238512158,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016267105238512158,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13013684004545212,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.17877933233976365,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016267105238512158,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016267105238512158,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13013684004545212,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.17877933233976365,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016267105238512158,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016267105238512158,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05353532508015633,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09405137300491333,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005353532452136278,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005353532452136278,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4249632477760315,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4863076567649841,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.042496326565742495,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042496326565742495,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2284375344184409,
|
|
"calibration/batch_distribution_entropy": 0.9789694635918931,
|
|
"calibration/buffer_distribution_entropy": 0.7069639594619622,
|
|
"calibration/confidence_entropy": 0.5052800856703774,
|
|
"calibration/coverage@0%": 0.013349339041393076,
|
|
"calibration/coverage@1%": 0.013349339041393076,
|
|
"calibration/coverage@10%": 0.028066876862109864,
|
|
"calibration/coverage@15%": 0.13941603507894876,
|
|
"calibration/coverage@20%": 0.35491320569847046,
|
|
"calibration/coverage@25%": 0.7133672559496242,
|
|
"calibration/coverage@30%": 0.9717441099476438,
|
|
"calibration/coverage@5%": 0.016490700297937578,
|
|
"calibration/ece": 0.19922902642220328,
|
|
"calibration/mean_confidence": 0.5600871500476073,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015798611111111117,
|
|
"completions/max_length": 3832.8,
|
|
"completions/max_terminated_length": 3832.8,
|
|
"completions/mean_length": 674.5237915039063,
|
|
"completions/mean_terminated_length": 685.4586303710937,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 208.0,
|
|
"epoch": 0.1079986500168748,
|
|
"grad_norm": 0.00045343354577198625,
|
|
"learning_rate": 4.909638554216868e-06,
|
|
"loss": -0.0126,
|
|
"num_tokens": 83028909.0,
|
|
"reward": 0.959083867073059,
|
|
"reward_std": 0.16450012922286988,
|
|
"rewards/accuracy_reward": 0.6482638955116272,
|
|
"rewards/batch_coverage_0": 0.14531267285346985,
|
|
"rewards/batch_coverage_1": 0.14531267285346985,
|
|
"rewards/batch_coverage_5": 0.18149316310882568,
|
|
"rewards/brier_reward": 0.7108132243156433,
|
|
"rewards/confidence_uniqueness_reward": 0.9358696818351746,
|
|
"rewards/format_reward": 0.982899296283722,
|
|
"rewards/frontier_aurc_reward": -0.0024533634074032306,
|
|
"rewards/frontier_coverage_10": -0.05099590886384249,
|
|
"rewards/frontier_coverage_15": -0.05099590886384249,
|
|
"rewards/frontier_coverage_20": -0.05099590886384249,
|
|
"rewards/frontier_coverage_25": -0.05099590886384249,
|
|
"rewards/frontier_ece_reward": -0.004913155583199114,
|
|
"rewards/frontier_entropy_batch_reward": -0.23995730578899382,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19529079794883727,
|
|
"signal/accuracy_reward/group_std_mean": 0.2586113512516022,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.26388888657093046,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09764539897441864,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09764539897441864,
|
|
"signal/advantage_abs_mean": 0.12301210165023804,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12301210165023804,
|
|
"signal/advantage_pre_scale_std": 0.1812170535326004,
|
|
"signal/advantage_std": 0.1812170535326004,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.2245740681886673,
|
|
"signal/batch_coverage_0/group_std_mean": 0.28672278523445127,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.002807175787165761,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002807175787165761,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.2245740681886673,
|
|
"signal/batch_coverage_1/group_std_mean": 0.28672278523445127,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.002807175787165761,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002807175787165761,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.23478643000125884,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2979602515697479,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0029348304495215418,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0029348304495215418,
|
|
"signal/brier_reward/centered_abs_mean": 0.22289091646671294,
|
|
"signal/brier_reward/group_std_mean": 0.2713299959897995,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02228909097611904,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02228909097611904,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03871301300823689,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0633381500840187,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038713010493665934,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038713010493665934,
|
|
"signal/format_reward/centered_abs_mean": 0.02821723148226738,
|
|
"signal/format_reward/group_std_mean": 0.05076800622045994,
|
|
"signal/format_reward/group_zero_std_frac": 0.7972222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01410861574113369,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01410861574113369,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019098359625786543,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0031546786427497863,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.387295025982894e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.387295025982894e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2484348773956299,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.32347086668014524,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031054360792040826,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031054360792040826,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2484348773956299,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.32347086668014524,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031054360792040826,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031054360792040826,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2484348773956299,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.32347086668014524,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031054360792040826,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031054360792040826,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2484348773956299,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.32347086668014524,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031054360792040826,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031054360792040826,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.07522738426923752,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.11308020800352096,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007522738631814718,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007522738631814718,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3286409616470337,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4049364268779755,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03286409676074982,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03286409676074982,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.42620806875292194,
|
|
"calibration/batch_distribution_entropy": 0.9693127773013401,
|
|
"calibration/buffer_distribution_entropy": 0.7631346993750441,
|
|
"calibration/confidence_entropy": 0.45513342249289657,
|
|
"calibration/coverage@0%": 0.0021333333333333334,
|
|
"calibration/coverage@1%": 0.0021333333333333334,
|
|
"calibration/coverage@10%": 0.0021333333333333334,
|
|
"calibration/coverage@15%": 0.0037333333333333337,
|
|
"calibration/coverage@20%": 0.006933333333333333,
|
|
"calibration/coverage@25%": 0.019205704099821745,
|
|
"calibration/coverage@30%": 0.031472370766488414,
|
|
"calibration/coverage@5%": 0.0021333333333333334,
|
|
"calibration/ece": 0.2263636327138277,
|
|
"calibration/mean_confidence": 0.5856619741065566,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015885416666666652,
|
|
"completions/max_length": 3451.2,
|
|
"completions/max_terminated_length": 3451.2,
|
|
"completions/mean_length": 685.3610229492188,
|
|
"completions/mean_terminated_length": 696.4227172851563,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 195.8,
|
|
"epoch": 0.11999850001874976,
|
|
"grad_norm": 0.0004845529329031706,
|
|
"learning_rate": 4.759036144578314e-06,
|
|
"loss": -0.014,
|
|
"num_tokens": 94021868.0,
|
|
"reward": 0.9533812642097473,
|
|
"reward_std": 0.16151563227176666,
|
|
"rewards/accuracy_reward": 0.6358506917953491,
|
|
"rewards/batch_coverage_0": 0.19706983268260955,
|
|
"rewards/batch_coverage_1": 0.19706983268260955,
|
|
"rewards/batch_coverage_5": 0.23945788741111756,
|
|
"rewards/brier_reward": 0.7190346479415893,
|
|
"rewards/confidence_uniqueness_reward": 0.9298328995704651,
|
|
"rewards/format_reward": 0.9833333373069764,
|
|
"rewards/frontier_aurc_reward": -0.0026207052171230314,
|
|
"rewards/frontier_coverage_10": -0.02007846850901842,
|
|
"rewards/frontier_coverage_15": -0.02007846850901842,
|
|
"rewards/frontier_coverage_20": -0.02007846850901842,
|
|
"rewards/frontier_coverage_25": -0.02007846850901842,
|
|
"rewards/frontier_ece_reward": 0.010435734037309885,
|
|
"rewards/frontier_entropy_batch_reward": -0.29024423360824586,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18299153745174407,
|
|
"signal/accuracy_reward/group_std_mean": 0.24039433300495147,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.31944445371627805,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09149576872587203,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09149576872587203,
|
|
"signal/advantage_abs_mean": 0.12073450684547424,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12073450684547424,
|
|
"signal/advantage_pre_scale_std": 0.1832158923149109,
|
|
"signal/advantage_std": 0.1832158923149109,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.22889325618743897,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2958221137523651,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0028611657209694384,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0028611657209694384,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.22889325618743897,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2958221137523651,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0028611657209694384,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0028611657209694384,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.24371460676193238,
|
|
"signal/batch_coverage_5/group_std_mean": 0.31154937148094175,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0030464326031506062,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0030464326031506062,
|
|
"signal/brier_reward/centered_abs_mean": 0.2291450619697571,
|
|
"signal/brier_reward/group_std_mean": 0.28165445327758787,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022914505377411843,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022914505377411843,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04500762522220612,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07112801149487495,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0045007624663412574,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045007624663412574,
|
|
"signal/format_reward/centered_abs_mean": 0.02735460065305233,
|
|
"signal/format_reward/group_std_mean": 0.049432437121868136,
|
|
"signal/format_reward/group_zero_std_frac": 0.8027777910232544,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013677300326526165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013677300326526165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002755277929827571,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004232291178777814,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.44409763783915e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.44409763783915e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2323669195175171,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3120919167995453,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029045864939689636,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029045864939689636,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2323669195175171,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3120919167995453,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029045864939689636,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029045864939689636,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2323669195175171,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3120919167995453,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029045864939689636,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029045864939689636,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2323669195175171,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3120919167995453,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029045864939689636,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029045864939689636,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.07308635264635086,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.10278299450874329,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007308634743094444,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007308634743094444,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37657630443573,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.44498764276504515,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03765763267874718,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03765763267874718,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.11999850001874976,
|
|
"eval_calibration/aurc": 0.24503824336932647,
|
|
"eval_calibration/batch_distribution_entropy": 0.9149101232032124,
|
|
"eval_calibration/buffer_distribution_entropy": 0.7883785345439861,
|
|
"eval_calibration/confidence_entropy": 0.4801741016558328,
|
|
"eval_calibration/coverage@0%": 0.09543010752688173,
|
|
"eval_calibration/coverage@1%": 0.09543010752688173,
|
|
"eval_calibration/coverage@10%": 0.13188844086021506,
|
|
"eval_calibration/coverage@15%": 0.2701612903225807,
|
|
"eval_calibration/coverage@20%": 0.625,
|
|
"eval_calibration/coverage@25%": 0.7259744623655914,
|
|
"eval_calibration/coverage@30%": 0.8570228494623656,
|
|
"eval_calibration/coverage@5%": 0.09543010752688173,
|
|
"eval_calibration/ece": 0.26293745781407957,
|
|
"eval_calibration/mean_confidence": 0.5938010707615454,
|
|
"eval_completions/clipped_ratio": 0.017361111111111088,
|
|
"eval_completions/max_length": 2231.8333333333335,
|
|
"eval_completions/max_terminated_length": 2231.8333333333335,
|
|
"eval_completions/mean_length": 690.0278015136719,
|
|
"eval_completions/mean_terminated_length": 702.1540120442709,
|
|
"eval_completions/min_length": 55.0,
|
|
"eval_completions/min_terminated_length": 259.6666666666667,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 94021868.0,
|
|
"eval_reward": 0.8694744308789571,
|
|
"eval_reward_std": 0.24820644656817117,
|
|
"eval_rewards/accuracy_reward": 0.6388888756434122,
|
|
"eval_rewards/batch_coverage_0": -0.02221489946047465,
|
|
"eval_rewards/batch_coverage_1": -0.02221489946047465,
|
|
"eval_rewards/batch_coverage_5": -0.02221489946047465,
|
|
"eval_rewards/brier_reward": 0.7143495082855225,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.878382941087087,
|
|
"eval_rewards/format_reward": 0.980902761220932,
|
|
"eval_rewards/frontier_aurc_reward": -0.0022628166674015424,
|
|
"eval_rewards/frontier_coverage_10": -0.03039093284557263,
|
|
"eval_rewards/frontier_coverage_15": -0.03039093284557263,
|
|
"eval_rewards/frontier_coverage_20": -0.03039093284557263,
|
|
"eval_rewards/frontier_coverage_25": -0.03039093284557263,
|
|
"eval_rewards/frontier_ece_reward": 0.007764855942999323,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.980902761220932,
|
|
"eval_runtime": 210.5934,
|
|
"eval_samples_per_second": 4.748,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4458550363779068,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4789164414008458,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2229275181889534,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2229275181889534,
|
|
"eval_signal/advantage_abs_mean": 0.2139763260881106,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2139763260881106,
|
|
"eval_signal/advantage_pre_scale_std": 0.24738918244838715,
|
|
"eval_signal/advantage_std": 0.24738918244838715,
|
|
"eval_signal/batch_coverage_0/centered_abs_mean": 0.25763875246047974,
|
|
"eval_signal/batch_coverage_0/group_std_mean": 0.37101367115974426,
|
|
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.003220484397994975,
|
|
"eval_signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.003220484397994975,
|
|
"eval_signal/batch_coverage_1/centered_abs_mean": 0.25763875246047974,
|
|
"eval_signal/batch_coverage_1/group_std_mean": 0.37101367115974426,
|
|
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.003220484397994975,
|
|
"eval_signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.003220484397994975,
|
|
"eval_signal/batch_coverage_5/centered_abs_mean": 0.25763875246047974,
|
|
"eval_signal/batch_coverage_5/group_std_mean": 0.37101367115974426,
|
|
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.003220484397994975,
|
|
"eval_signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.003220484397994975,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2526586403449376,
|
|
"eval_signal/brier_reward/group_std_mean": 0.305042306582133,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02526586347570022,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02526586347570022,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06382480263710022,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.11009210348129272,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006382480263710022,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006382480263710022,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.03602430565903584,
|
|
"eval_signal/format_reward/group_std_mean": 0.08875375427305698,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.5555555721124014,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.01801215282951792,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.01801215282951792,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0021421636726396778,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.003887783153913915,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6777045907995973e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6777045907995973e-05,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.2909838656584422,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4132361908753713,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003637298437145849,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003637298437145849,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.2909838656584422,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4132361908753713,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003637298437145849,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003637298437145849,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2909838656584422,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.4132361908753713,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003637298437145849,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003637298437145849,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2909838656584422,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.4132361908753713,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003637298437145849,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003637298437145849,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.07043305411934853,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.10171593228975932,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007043305862074097,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007043305862074097,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.03602430565903584,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.08875375427305698,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.5555555721124014,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0036024306124697127,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0036024306124697127,
|
|
"eval_steps_per_second": 0.028,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2734907856073491,
|
|
"calibration/batch_distribution_entropy": 0.990600359943647,
|
|
"calibration/buffer_distribution_entropy": 0.8060855096620092,
|
|
"calibration/confidence_entropy": 0.4990627999443043,
|
|
"calibration/coverage@0%": 0.0064308538800154705,
|
|
"calibration/coverage@1%": 0.0064308538800154705,
|
|
"calibration/coverage@10%": 0.05763552424235219,
|
|
"calibration/coverage@15%": 0.15974925934316536,
|
|
"calibration/coverage@20%": 0.4282457791634802,
|
|
"calibration/coverage@25%": 0.5123595505617977,
|
|
"calibration/coverage@30%": 0.5977469262897818,
|
|
"calibration/coverage@5%": 0.0064308538800154705,
|
|
"calibration/ece": 0.2293868463511859,
|
|
"calibration/mean_confidence": 0.5160291099841612,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.016666666666666673,
|
|
"completions/max_length": 3609.6,
|
|
"completions/max_terminated_length": 3609.6,
|
|
"completions/mean_length": 740.4568603515625,
|
|
"completions/mean_terminated_length": 753.1831420898437,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 175.8,
|
|
"epoch": 0.13199835002062474,
|
|
"grad_norm": 0.00038589793257415295,
|
|
"learning_rate": 4.60843373493976e-06,
|
|
"loss": -0.0134,
|
|
"num_tokens": 105632507.0,
|
|
"reward": 0.9653935313224793,
|
|
"reward_std": 0.15435545146465302,
|
|
"rewards/accuracy_reward": 0.6528645873069763,
|
|
"rewards/batch_coverage_0": 0.1779848724603653,
|
|
"rewards/batch_coverage_1": 0.1779848724603653,
|
|
"rewards/batch_coverage_5": 0.20932372510433198,
|
|
"rewards/brier_reward": 0.7137936234474183,
|
|
"rewards/confidence_uniqueness_reward": 0.9372226595878601,
|
|
"rewards/format_reward": 0.9832465171813964,
|
|
"rewards/frontier_aurc_reward": -0.0020943485433235765,
|
|
"rewards/frontier_coverage_10": -0.04616634603589773,
|
|
"rewards/frontier_coverage_15": -0.04616634603589773,
|
|
"rewards/frontier_coverage_20": -0.04616634603589773,
|
|
"rewards/frontier_coverage_25": -0.04616634603589773,
|
|
"rewards/frontier_ece_reward": 0.000497845932841301,
|
|
"rewards/frontier_entropy_batch_reward": -0.22545139193534852,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18496636152267457,
|
|
"signal/accuracy_reward/group_std_mean": 0.24146317541599274,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.32500000596046447,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09248318076133728,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09248318076133728,
|
|
"signal/advantage_abs_mean": 0.11645734161138535,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11645734161138535,
|
|
"signal/advantage_pre_scale_std": 0.17366244792938232,
|
|
"signal/advantage_std": 0.17366244792938232,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.2309662103652954,
|
|
"signal/batch_coverage_0/group_std_mean": 0.29108461141586306,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.00288707772269845,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.00288707772269845,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.2309662103652954,
|
|
"signal/batch_coverage_1/group_std_mean": 0.29108461141586306,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.00288707772269845,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.00288707772269845,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.24196322560310363,
|
|
"signal/batch_coverage_5/group_std_mean": 0.30381428003311156,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0030245401430875063,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0030245401430875063,
|
|
"signal/brier_reward/centered_abs_mean": 0.2172221302986145,
|
|
"signal/brier_reward/group_std_mean": 0.26663507223129274,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02172221466898918,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02172221466898918,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.035537094622850415,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.058692584931850436,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035537096671760084,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035537096671760084,
|
|
"signal/format_reward/centered_abs_mean": 0.02611219584941864,
|
|
"signal/format_reward/group_std_mean": 0.047407979518175124,
|
|
"signal/format_reward/group_zero_std_frac": 0.8083333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01305609792470932,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01305609792470932,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015810795594006777,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00255404831841588,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.976349412871059e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.976349412871059e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2621347904205322,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3368204593658447,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032766849733889105,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032766849733889105,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2621347904205322,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3368204593658447,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032766849733889105,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032766849733889105,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2621347904205322,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3368204593658447,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032766849733889105,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032766849733889105,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2621347904205322,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3368204593658447,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032766849733889105,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032766849733889105,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06554573103785515,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09148340672254562,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006554573215544224,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006554573215544224,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31403006315231324,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38922271728515623,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031403007730841634,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031403007730841634,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3275345487836848,
|
|
"calibration/batch_distribution_entropy": 0.9449756038224791,
|
|
"calibration/buffer_distribution_entropy": 0.834595708853868,
|
|
"calibration/confidence_entropy": 0.5129195563846356,
|
|
"calibration/coverage@0%": 0.007983368101048768,
|
|
"calibration/coverage@1%": 0.007983368101048768,
|
|
"calibration/coverage@10%": 0.13203228114452703,
|
|
"calibration/coverage@15%": 0.2050168825938024,
|
|
"calibration/coverage@20%": 0.29055155383008396,
|
|
"calibration/coverage@25%": 0.3794740615735737,
|
|
"calibration/coverage@30%": 0.5310297482837528,
|
|
"calibration/coverage@5%": 0.007983368101048768,
|
|
"calibration/ece": 0.1811614853520707,
|
|
"calibration/mean_confidence": 0.6212859964100084,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017621527777777767,
|
|
"completions/max_length": 3919.0,
|
|
"completions/max_terminated_length": 3919.0,
|
|
"completions/mean_length": 795.82353515625,
|
|
"completions/mean_terminated_length": 810.206396484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 223.6,
|
|
"epoch": 0.14399820002249972,
|
|
"grad_norm": 0.00040276983054354787,
|
|
"learning_rate": 4.457831325301205e-06,
|
|
"loss": -0.0156,
|
|
"num_tokens": 117896970.0,
|
|
"reward": 0.953042209148407,
|
|
"reward_std": 0.1605542540550232,
|
|
"rewards/accuracy_reward": 0.6355034828186035,
|
|
"rewards/batch_coverage_0": 0.19173536896705629,
|
|
"rewards/batch_coverage_1": 0.19173536896705629,
|
|
"rewards/batch_coverage_5": 0.24326471984386444,
|
|
"rewards/brier_reward": 0.7469355225563049,
|
|
"rewards/confidence_uniqueness_reward": 0.9301149845123291,
|
|
"rewards/format_reward": 0.9822048544883728,
|
|
"rewards/frontier_aurc_reward": -0.002251867623999715,
|
|
"rewards/frontier_coverage_10": -0.008283971901983022,
|
|
"rewards/frontier_coverage_15": -0.008283971901983022,
|
|
"rewards/frontier_coverage_20": -0.008283971901983022,
|
|
"rewards/frontier_coverage_25": -0.008283971901983022,
|
|
"rewards/frontier_ece_reward": 0.00961360651999712,
|
|
"rewards/frontier_entropy_batch_reward": -0.3187023103237152,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18662651777267455,
|
|
"signal/accuracy_reward/group_std_mean": 0.24246540665626526,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3250000089406967,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09331325888633728,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09331325888633728,
|
|
"signal/advantage_abs_mean": 0.12090523093938828,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12090523093938828,
|
|
"signal/advantage_pre_scale_std": 0.18442307114601136,
|
|
"signal/advantage_std": 0.18442307114601136,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.18079350888729095,
|
|
"signal/batch_coverage_0/group_std_mean": 0.23635791540145873,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0022599190939217805,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0022599190939217805,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.18079350888729095,
|
|
"signal/batch_coverage_1/group_std_mean": 0.23635791540145873,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0022599190939217805,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0022599190939217805,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.19579915404319764,
|
|
"signal/batch_coverage_5/group_std_mean": 0.25361494719982147,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0024474896024912597,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0024474896024912597,
|
|
"signal/brier_reward/centered_abs_mean": 0.18555007874965668,
|
|
"signal/brier_reward/group_std_mean": 0.23304781019687654,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01855500750243664,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01855500750243664,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04171190112829208,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06940498352050781,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004171190271154046,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004171190271154046,
|
|
"signal/format_reward/centered_abs_mean": 0.02979058101773262,
|
|
"signal/format_reward/group_std_mean": 0.05553009361028671,
|
|
"signal/format_reward/group_zero_std_frac": 0.7722222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01489529050886631,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01489529050886631,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001971939392387867,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0030786401126533746,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4649243277963252e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4649243277963252e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18704175055027009,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2533830404281616,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00233802180737257,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00233802180737257,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18704175055027009,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2533830404281616,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00233802180737257,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00233802180737257,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18704175055027009,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2533830404281616,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00233802180737257,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00233802180737257,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18704175055027009,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2533830404281616,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00233802180737257,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00233802180737257,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.050172007828950885,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07390657812356949,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0050172007642686365,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0050172007642686365,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36339402198791504,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.42683065533638,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03633940070867538,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03633940070867538,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21806803806256583,
|
|
"calibration/batch_distribution_entropy": 0.9657770590133431,
|
|
"calibration/buffer_distribution_entropy": 0.851602880338957,
|
|
"calibration/confidence_entropy": 0.5136590638889837,
|
|
"calibration/coverage@0%": 0.022063135791320482,
|
|
"calibration/coverage@1%": 0.022063135791320482,
|
|
"calibration/coverage@10%": 0.27162750217580506,
|
|
"calibration/coverage@15%": 0.5194850512485446,
|
|
"calibration/coverage@20%": 0.5918474666286062,
|
|
"calibration/coverage@25%": 0.6691951613244385,
|
|
"calibration/coverage@30%": 0.7363067470702014,
|
|
"calibration/coverage@5%": 0.09697161383295778,
|
|
"calibration/ece": 0.18653090089478847,
|
|
"calibration/mean_confidence": 0.5649809346256233,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015190972222222232,
|
|
"completions/max_length": 3579.2,
|
|
"completions/max_terminated_length": 3579.2,
|
|
"completions/mean_length": 827.180126953125,
|
|
"completions/mean_terminated_length": 839.9402099609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 218.6,
|
|
"epoch": 0.1559980500243747,
|
|
"grad_norm": 0.00037717988016083837,
|
|
"learning_rate": 4.307228915662651e-06,
|
|
"loss": -0.0129,
|
|
"num_tokens": 130520133.0,
|
|
"reward": 0.9705712556838989,
|
|
"reward_std": 0.14970431923866273,
|
|
"rewards/accuracy_reward": 0.6578993082046509,
|
|
"rewards/batch_coverage_0": 0.19972788393497468,
|
|
"rewards/batch_coverage_1": 0.19972788393497468,
|
|
"rewards/batch_coverage_5": 0.24470700025558473,
|
|
"rewards/brier_reward": 0.7467877745628357,
|
|
"rewards/confidence_uniqueness_reward": 0.9360103368759155,
|
|
"rewards/format_reward": 0.9847222328186035,
|
|
"rewards/frontier_aurc_reward": -0.0018979504937306046,
|
|
"rewards/frontier_coverage_10": -0.020660974085330963,
|
|
"rewards/frontier_coverage_15": -0.020660974085330963,
|
|
"rewards/frontier_coverage_20": -0.020660974085330963,
|
|
"rewards/frontier_coverage_25": -0.020660974085330963,
|
|
"rewards/frontier_ece_reward": 0.006439816157217138,
|
|
"rewards/frontier_entropy_batch_reward": -0.2665859043598175,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1732150584459305,
|
|
"signal/accuracy_reward/group_std_mean": 0.23058123290538787,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3388888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08660752922296525,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08660752922296525,
|
|
"signal/advantage_abs_mean": 0.11045674383640289,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11045674383640289,
|
|
"signal/advantage_pre_scale_std": 0.1725551962852478,
|
|
"signal/advantage_std": 0.1725551962852478,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.18980375230312346,
|
|
"signal/batch_coverage_0/group_std_mean": 0.24478774666786193,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0023725468199700117,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0023725468199700117,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.18980375230312346,
|
|
"signal/batch_coverage_1/group_std_mean": 0.24478774666786193,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0023725468199700117,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0023725468199700117,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.2032455176115036,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2611479640007019,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0025405690539628267,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0025405690539628267,
|
|
"signal/brier_reward/centered_abs_mean": 0.18122467994689942,
|
|
"signal/brier_reward/group_std_mean": 0.2284662663936615,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018122468888759614,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018122468888759614,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03583796471357346,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06023039147257805,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035837964620441197,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035837964620441197,
|
|
"signal/format_reward/centered_abs_mean": 0.02546657994389534,
|
|
"signal/format_reward/group_std_mean": 0.048032527416944505,
|
|
"signal/format_reward/group_zero_std_frac": 0.8027777910232544,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01273328997194767,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01273328997194767,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015076728072017432,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0024621226824820043,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8845910562959035e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8845910562959035e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2094873458147049,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27635043263435366,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026185918133705853,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026185918133705853,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2094873458147049,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27635043263435366,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026185918133705853,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026185918133705853,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2094873458147049,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27635043263435366,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026185918133705853,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026185918133705853,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2094873458147049,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27635043263435366,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026185918133705853,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026185918133705853,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0486338272690773,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0697670891880989,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004863383062183857,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004863383062183857,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3322786569595337,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40324999690055846,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03322786539793014,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03322786539793014,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26774489801203494,
|
|
"calibration/batch_distribution_entropy": 0.978817101215669,
|
|
"calibration/buffer_distribution_entropy": 0.8711736259452524,
|
|
"calibration/confidence_entropy": 0.4967053350098084,
|
|
"calibration/coverage@0%": 0.020868339590123065,
|
|
"calibration/coverage@1%": 0.057329465595485,
|
|
"calibration/coverage@10%": 0.12732303650796384,
|
|
"calibration/coverage@15%": 0.21166113758605407,
|
|
"calibration/coverage@20%": 0.2644490561303471,
|
|
"calibration/coverage@25%": 0.4677621511679247,
|
|
"calibration/coverage@30%": 0.611612226666965,
|
|
"calibration/coverage@5%": 0.08038576586358151,
|
|
"calibration/ece": 0.16713524185845602,
|
|
"calibration/mean_confidence": 0.5546752871046183,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.022048611111111095,
|
|
"completions/max_length": 3771.4,
|
|
"completions/max_terminated_length": 3771.4,
|
|
"completions/mean_length": 849.20849609375,
|
|
"completions/mean_terminated_length": 868.3218994140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 198.6,
|
|
"epoch": 0.16799790002624967,
|
|
"grad_norm": 0.00036221364280208945,
|
|
"learning_rate": 4.156626506024097e-06,
|
|
"loss": -0.0195,
|
|
"num_tokens": 143381159.0,
|
|
"reward": 0.9556066989898682,
|
|
"reward_std": 0.1507127434015274,
|
|
"rewards/accuracy_reward": 0.6329861164093018,
|
|
"rewards/batch_coverage_0": 0.22227927744388581,
|
|
"rewards/batch_coverage_1": 0.22227927744388581,
|
|
"rewards/batch_coverage_5": 0.2565095841884613,
|
|
"rewards/brier_reward": 0.7318851351737976,
|
|
"rewards/confidence_uniqueness_reward": 0.9302015900611877,
|
|
"rewards/format_reward": 0.9777777671813965,
|
|
"rewards/frontier_aurc_reward": -0.0018549226922914385,
|
|
"rewards/frontier_coverage_10": -0.010580170154571533,
|
|
"rewards/frontier_coverage_15": -0.010580170154571533,
|
|
"rewards/frontier_coverage_20": -0.010580170154571533,
|
|
"rewards/frontier_coverage_25": -0.010580170154571533,
|
|
"rewards/frontier_ece_reward": 0.006841949612135067,
|
|
"rewards/frontier_entropy_batch_reward": -0.2487931430339813,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17258029878139497,
|
|
"signal/accuracy_reward/group_std_mean": 0.2257601410150528,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.36111111044883726,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08629014939069748,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08629014939069748,
|
|
"signal/advantage_abs_mean": 0.11218917965888978,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11218917965888978,
|
|
"signal/advantage_pre_scale_std": 0.17515319883823394,
|
|
"signal/advantage_std": 0.17515319883823394,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.20355886816978455,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2609287977218628,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0025444858241826297,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0025444858241826297,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.20355886816978455,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2609287977218628,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0025444858241826297,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0025444858241826297,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.21255633533000945,
|
|
"signal/batch_coverage_5/group_std_mean": 0.27067430019378663,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.002656954200938344,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002656954200938344,
|
|
"signal/brier_reward/centered_abs_mean": 0.1885166198015213,
|
|
"signal/brier_reward/group_std_mean": 0.23571482598781585,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018851662799715996,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018851662799715996,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04458749443292618,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07214634269475936,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004458749480545521,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004458749480545521,
|
|
"signal/format_reward/centered_abs_mean": 0.03548177108168602,
|
|
"signal/format_reward/group_std_mean": 0.06166907772421837,
|
|
"signal/format_reward/group_zero_std_frac": 0.7611111164093017,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01774088554084301,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01774088554084301,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014164665713906287,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022935196291655304,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7705832578940318e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7705832578940318e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2339934378862381,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.304482764005661,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002924918010830879,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002924918010830879,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2339934378862381,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.304482764005661,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002924918010830879,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002924918010830879,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2339934378862381,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.304482764005661,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002924918010830879,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002924918010830879,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2339934378862381,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.304482764005661,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002924918010830879,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002924918010830879,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05036986321210861,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06987290233373641,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00503698643296957,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00503698643296957,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32669522166252135,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4000322759151459,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03266952373087406,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03266952373087406,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25090739042372406,
|
|
"calibration/batch_distribution_entropy": 0.9016094424964652,
|
|
"calibration/buffer_distribution_entropy": 0.8801688271412539,
|
|
"calibration/confidence_entropy": 0.46877690867426763,
|
|
"calibration/coverage@0%": 0.017339479440311924,
|
|
"calibration/coverage@1%": 0.017339479440311924,
|
|
"calibration/coverage@10%": 0.20376245594161743,
|
|
"calibration/coverage@15%": 0.2504112495073011,
|
|
"calibration/coverage@20%": 0.433343520520366,
|
|
"calibration/coverage@25%": 0.5564459742085615,
|
|
"calibration/coverage@30%": 0.7246508428898277,
|
|
"calibration/coverage@5%": 0.15206532800428058,
|
|
"calibration/ece": 0.15549924503905505,
|
|
"calibration/mean_confidence": 0.6747166208484092,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017100694444444443,
|
|
"completions/max_length": 3746.2,
|
|
"completions/max_terminated_length": 3746.2,
|
|
"completions/mean_length": 829.651220703125,
|
|
"completions/mean_terminated_length": 844.1653930664063,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 234.8,
|
|
"epoch": 0.17999775002812465,
|
|
"grad_norm": 0.00034723407588899136,
|
|
"learning_rate": 4.006024096385543e-06,
|
|
"loss": -0.0165,
|
|
"num_tokens": 156003637.0,
|
|
"reward": 0.9848790049552918,
|
|
"reward_std": 0.1475004494190216,
|
|
"rewards/accuracy_reward": 0.6940104246139527,
|
|
"rewards/batch_coverage_0": 0.2558385759592056,
|
|
"rewards/batch_coverage_1": 0.2558385759592056,
|
|
"rewards/batch_coverage_5": 0.3020994126796722,
|
|
"rewards/brier_reward": 0.7776327848434448,
|
|
"rewards/confidence_uniqueness_reward": 0.9272227883338928,
|
|
"rewards/format_reward": 0.9827257037162781,
|
|
"rewards/frontier_aurc_reward": -0.001760008931159973,
|
|
"rewards/frontier_coverage_10": -0.011692027468234301,
|
|
"rewards/frontier_coverage_15": -0.011692027468234301,
|
|
"rewards/frontier_coverage_20": -0.011692027468234301,
|
|
"rewards/frontier_coverage_25": -0.011692027468234301,
|
|
"rewards/frontier_ece_reward": 0.01447356604039669,
|
|
"rewards/frontier_entropy_batch_reward": -0.3498757779598236,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16115993559360503,
|
|
"signal/accuracy_reward/group_std_mean": 0.2164893090724945,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3805555641651154,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08057996779680252,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08057996779680252,
|
|
"signal/advantage_abs_mean": 0.10589781254529954,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10589781254529954,
|
|
"signal/advantage_pre_scale_std": 0.17677578926086426,
|
|
"signal/advantage_std": 0.17677578926086426,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.17336927056312562,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2256958395242691,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.002167115896008909,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002167115896008909,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.17336927056312562,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2256958395242691,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.002167115896008909,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002167115896008909,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.18682141304016114,
|
|
"signal/batch_coverage_5/group_std_mean": 0.24154105186462402,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.002335267560556531,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002335267560556531,
|
|
"signal/brier_reward/centered_abs_mean": 0.16635515093803405,
|
|
"signal/brier_reward/group_std_mean": 0.2119331955909729,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016635514609515668,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016635514609515668,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.043788299709558484,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0722777083516121,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004378830175846815,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004378830175846815,
|
|
"signal/format_reward/centered_abs_mean": 0.03006184920668602,
|
|
"signal/format_reward/group_std_mean": 0.056455048173666,
|
|
"signal/format_reward/group_zero_std_frac": 0.7694444537162781,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01503092460334301,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01503092460334301,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019435939844697713,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0030415844637900593,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.429492669762112e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.429492669762112e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17181967794895173,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23387707471847535,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021477460162714125,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021477460162714125,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17181967794895173,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23387707471847535,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021477460162714125,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021477460162714125,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17181967794895173,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23387707471847535,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021477460162714125,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021477460162714125,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17181967794895173,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23387707471847535,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021477460162714125,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021477460162714125,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04731270372867584,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06793354153633117,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004731270391494036,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004731270391494036,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35985006093978883,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.42621861696243285,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035985006392002104,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035985006392002104,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1976213450869153,
|
|
"calibration/batch_distribution_entropy": 0.9616107677490462,
|
|
"calibration/buffer_distribution_entropy": 0.8866639393669162,
|
|
"calibration/confidence_entropy": 0.4915158828556917,
|
|
"calibration/coverage@0%": 0.021333535611681585,
|
|
"calibration/coverage@1%": 0.021333535611681585,
|
|
"calibration/coverage@10%": 0.2725894345406946,
|
|
"calibration/coverage@15%": 0.5466634128666609,
|
|
"calibration/coverage@20%": 0.6271725649678406,
|
|
"calibration/coverage@25%": 0.7036610448375155,
|
|
"calibration/coverage@30%": 0.8324734089439971,
|
|
"calibration/coverage@5%": 0.12380377432524607,
|
|
"calibration/ece": 0.22536745039132217,
|
|
"calibration/mean_confidence": 0.5809190533319357,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.022135416666666696,
|
|
"completions/max_length": 3927.2,
|
|
"completions/max_terminated_length": 3927.2,
|
|
"completions/mean_length": 849.037158203125,
|
|
"completions/mean_terminated_length": 868.4296508789063,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 198.6,
|
|
"epoch": 0.19199760002999963,
|
|
"grad_norm": 0.0003430581418797374,
|
|
"learning_rate": 3.855421686746989e-06,
|
|
"loss": -0.0193,
|
|
"num_tokens": 168837825.0,
|
|
"reward": 0.9680354118347168,
|
|
"reward_std": 0.1533115267753601,
|
|
"rewards/accuracy_reward": 0.6552951455116272,
|
|
"rewards/batch_coverage_0": 0.22071827650070192,
|
|
"rewards/batch_coverage_1": 0.22071827650070192,
|
|
"rewards/batch_coverage_5": 0.25190787613391874,
|
|
"rewards/brier_reward": 0.740069842338562,
|
|
"rewards/confidence_uniqueness_reward": 0.9306141138076782,
|
|
"rewards/format_reward": 0.9777777791023254,
|
|
"rewards/frontier_aurc_reward": -0.0016550872707739473,
|
|
"rewards/frontier_coverage_10": -0.02017199695110321,
|
|
"rewards/frontier_coverage_15": -0.02017199695110321,
|
|
"rewards/frontier_coverage_20": -0.02017199695110321,
|
|
"rewards/frontier_coverage_25": -0.02017199695110321,
|
|
"rewards/frontier_ece_reward": 0.006298495561350137,
|
|
"rewards/frontier_entropy_batch_reward": -0.23836829364299775,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17834743857383728,
|
|
"signal/accuracy_reward/group_std_mean": 0.23480430543422698,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.34166666865348816,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08917371928691864,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08917371928691864,
|
|
"signal/advantage_abs_mean": 0.11297503858804703,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11297503858804703,
|
|
"signal/advantage_pre_scale_std": 0.1769232213497162,
|
|
"signal/advantage_std": 0.1769232213497162,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.19219822883605958,
|
|
"signal/batch_coverage_0/group_std_mean": 0.24373748004436493,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.002402477944269776,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002402477944269776,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.19219822883605958,
|
|
"signal/batch_coverage_1/group_std_mean": 0.24373748004436493,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.002402477944269776,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002402477944269776,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.20115534961223602,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2544471651315689,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0025144419632852077,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0025144419632852077,
|
|
"signal/brier_reward/centered_abs_mean": 0.18416757583618165,
|
|
"signal/brier_reward/group_std_mean": 0.23052338063716887,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01841675750911236,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01841675750911236,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04268275275826454,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07017161026597023,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0042682755272835495,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042682755272835495,
|
|
"signal/format_reward/centered_abs_mean": 0.03362630158662796,
|
|
"signal/format_reward/group_std_mean": 0.05970071628689766,
|
|
"signal/format_reward/group_zero_std_frac": 0.7638889074325561,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01681315079331398,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01681315079331398,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00142300168517977,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002324837143532932,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.778752230165992e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.778752230165992e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23152693808078767,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.30003886818885805,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028940868563950063,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028940868563950063,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.23152693808078767,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.30003886818885805,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028940868563950063,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028940868563950063,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.23152693808078767,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.30003886818885805,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028940868563950063,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028940868563950063,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.23152693808078767,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.30003886818885805,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028940868563950063,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028940868563950063,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04788872003555298,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06640107333660125,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004788872133940458,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004788872133940458,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31162261962890625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3844640016555786,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031162263825535774,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031162263825535774,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20062143503458127,
|
|
"calibration/batch_distribution_entropy": 0.9760764282719704,
|
|
"calibration/buffer_distribution_entropy": 0.89837311528161,
|
|
"calibration/confidence_entropy": 0.48984631022514985,
|
|
"calibration/coverage@0%": 0.03827803314207638,
|
|
"calibration/coverage@1%": 0.03827803314207638,
|
|
"calibration/coverage@10%": 0.22568576345303254,
|
|
"calibration/coverage@15%": 0.31462502529552017,
|
|
"calibration/coverage@20%": 0.427952622441729,
|
|
"calibration/coverage@25%": 0.7450451059064468,
|
|
"calibration/coverage@30%": 0.8856312620676748,
|
|
"calibration/coverage@5%": 0.093387013089766,
|
|
"calibration/ece": 0.17873929361532662,
|
|
"calibration/mean_confidence": 0.5269320314763848,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.019357638888888862,
|
|
"completions/max_length": 3892.4,
|
|
"completions/max_terminated_length": 3892.4,
|
|
"completions/mean_length": 843.2461059570312,
|
|
"completions/mean_terminated_length": 859.9733764648438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 224.4,
|
|
"epoch": 0.2039974500318746,
|
|
"grad_norm": 0.000319209648296237,
|
|
"learning_rate": 3.7048192771084342e-06,
|
|
"loss": -0.0181,
|
|
"num_tokens": 181639220.0,
|
|
"reward": 0.9859793901443481,
|
|
"reward_std": 0.1493854582309723,
|
|
"rewards/accuracy_reward": 0.6875867962837219,
|
|
"rewards/batch_coverage_0": 0.22644222974777223,
|
|
"rewards/batch_coverage_1": 0.22644222974777223,
|
|
"rewards/batch_coverage_5": 0.27131586968898774,
|
|
"rewards/brier_reward": 0.7592910766601563,
|
|
"rewards/confidence_uniqueness_reward": 0.9317811608314515,
|
|
"rewards/format_reward": 0.9805555462837219,
|
|
"rewards/frontier_aurc_reward": -0.00154776640702039,
|
|
"rewards/frontier_coverage_10": -0.024513232393655927,
|
|
"rewards/frontier_coverage_15": -0.024513232393655927,
|
|
"rewards/frontier_coverage_20": -0.024513232393655927,
|
|
"rewards/frontier_coverage_25": -0.024513232393655927,
|
|
"rewards/frontier_ece_reward": 0.007498538121581078,
|
|
"rewards/frontier_entropy_batch_reward": -0.25756355822086335,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17463650107383727,
|
|
"signal/accuracy_reward/group_std_mean": 0.2279823213815689,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.36111111044883726,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08731825053691863,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08731825053691863,
|
|
"signal/advantage_abs_mean": 0.10915019810199737,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10915019810199737,
|
|
"signal/advantage_pre_scale_std": 0.17613787949085236,
|
|
"signal/advantage_std": 0.17613787949085236,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.19334738850593566,
|
|
"signal/batch_coverage_0/group_std_mean": 0.24658811688423157,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0024168423376977445,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0024168423376977445,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.19334738850593566,
|
|
"signal/batch_coverage_1/group_std_mean": 0.24658811688423157,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0024168423376977445,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0024168423376977445,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.20597995519638063,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2616792768239975,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.002574749616906047,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002574749616906047,
|
|
"signal/brier_reward/centered_abs_mean": 0.17591671347618104,
|
|
"signal/brier_reward/group_std_mean": 0.22121395468711852,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017591670155525208,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017591670155525208,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.043560180068016055,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07251645848155022,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004356018453836441,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004356018453836441,
|
|
"signal/format_reward/centered_abs_mean": 0.03370225727558136,
|
|
"signal/format_reward/group_std_mean": 0.06109722778201103,
|
|
"signal/format_reward/group_zero_std_frac": 0.7611111164093017,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01685112863779068,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01685112863779068,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001522362232208252,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0024535955395549535,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.902952863019891e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.902952863019891e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21423524916172026,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.28035895228385926,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002677940670400858,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002677940670400858,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21423524916172026,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.28035895228385926,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002677940670400858,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002677940670400858,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21423524916172026,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.28035895228385926,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002677940670400858,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002677940670400858,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21423524916172026,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.28035895228385926,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002677940670400858,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002677940670400858,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.045525376498699185,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06463603004813194,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004552537761628628,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004552537761628628,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3149750053882599,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3879016041755676,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03149750158190727,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03149750158190727,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1358348459990671,
|
|
"calibration/batch_distribution_entropy": 0.9581468004493925,
|
|
"calibration/buffer_distribution_entropy": 0.9058789280429658,
|
|
"calibration/confidence_entropy": 0.4777319399957392,
|
|
"calibration/coverage@0%": 0.06015598582785542,
|
|
"calibration/coverage@1%": 0.06015598582785542,
|
|
"calibration/coverage@10%": 0.49925767380420216,
|
|
"calibration/coverage@15%": 0.6599960720981064,
|
|
"calibration/coverage@20%": 0.7835370688597245,
|
|
"calibration/coverage@25%": 0.8605917683674047,
|
|
"calibration/coverage@30%": 0.910972286834585,
|
|
"calibration/coverage@5%": 0.21968606101582533,
|
|
"calibration/ece": 0.13234316134727322,
|
|
"calibration/mean_confidence": 0.588607916452851,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02265625,
|
|
"completions/max_length": 3791.0,
|
|
"completions/max_terminated_length": 3791.0,
|
|
"completions/mean_length": 794.6526123046875,
|
|
"completions/mean_terminated_length": 813.1401489257812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 195.6,
|
|
"epoch": 0.2159973000337496,
|
|
"grad_norm": 0.0003698467626236379,
|
|
"learning_rate": 3.5542168674698798e-06,
|
|
"loss": -0.019,
|
|
"num_tokens": 193862290.0,
|
|
"reward": 0.9784747719764709,
|
|
"reward_std": 0.15081590712070464,
|
|
"rewards/accuracy_reward": 0.6746527791023255,
|
|
"rewards/batch_coverage_0": 0.2492100864648819,
|
|
"rewards/batch_coverage_1": 0.2492100864648819,
|
|
"rewards/batch_coverage_5": 0.3049799680709839,
|
|
"rewards/brier_reward": 0.7715118646621704,
|
|
"rewards/confidence_uniqueness_reward": 0.9270098686218262,
|
|
"rewards/format_reward": 0.97734375,
|
|
"rewards/frontier_aurc_reward": -0.0014773321105167269,
|
|
"rewards/frontier_coverage_10": -0.0038829758763313294,
|
|
"rewards/frontier_coverage_15": -0.0038829758763313294,
|
|
"rewards/frontier_coverage_20": -0.0038829758763313294,
|
|
"rewards/frontier_coverage_25": -0.0038829758763313294,
|
|
"rewards/frontier_ece_reward": 0.01303372485563159,
|
|
"rewards/frontier_entropy_batch_reward": -0.28508917093276975,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17347005009651184,
|
|
"signal/accuracy_reward/group_std_mean": 0.22811405956745148,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3444444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08673502504825592,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08673502504825592,
|
|
"signal/advantage_abs_mean": 0.11174682527780533,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11174682527780533,
|
|
"signal/advantage_pre_scale_std": 0.18068343102931977,
|
|
"signal/advantage_std": 0.18068343102931977,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.18331976234912872,
|
|
"signal/batch_coverage_0/group_std_mean": 0.23555707037448884,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0022914970759302378,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0022914970759302378,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.18331976234912872,
|
|
"signal/batch_coverage_1/group_std_mean": 0.23555707037448884,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0022914970759302378,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0022914970759302378,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.19930742979049682,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2547296941280365,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0024913428351283073,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0024913428351283073,
|
|
"signal/brier_reward/centered_abs_mean": 0.16807724833488463,
|
|
"signal/brier_reward/group_std_mean": 0.21393570005893708,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01680772416293621,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01680772416293621,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.045764881372451785,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07181711047887802,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004576487932354212,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004576487932354212,
|
|
"signal/format_reward/centered_abs_mean": 0.03446723110973835,
|
|
"signal/format_reward/group_std_mean": 0.05855829939246178,
|
|
"signal/format_reward/group_zero_std_frac": 0.7777777910232544,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.017233615554869176,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.017233615554869176,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015932812355458736,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0025266642682254315,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9916015298804267e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9916015298804267e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19937103390693664,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2660366952419281,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024921379517763853,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024921379517763853,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19937103390693664,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2660366952419281,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024921379517763853,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024921379517763853,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19937103390693664,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2660366952419281,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024921379517763853,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024921379517763853,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19937103390693664,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2660366952419281,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024921379517763853,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024921379517763853,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04495742619037628,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06341345757246017,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00449574263766408,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00449574263766408,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3384994626045227,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4110049486160278,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033849946409463885,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033849946409463885,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.17216695851229114,
|
|
"calibration/batch_distribution_entropy": 0.986412787620566,
|
|
"calibration/buffer_distribution_entropy": 0.9140113280249063,
|
|
"calibration/confidence_entropy": 0.49123367460238326,
|
|
"calibration/coverage@0%": 0.10162418131383777,
|
|
"calibration/coverage@1%": 0.11320312868225883,
|
|
"calibration/coverage@10%": 0.4708385873361153,
|
|
"calibration/coverage@15%": 0.5998207546036258,
|
|
"calibration/coverage@20%": 0.6566916783622447,
|
|
"calibration/coverage@25%": 0.6960610116207265,
|
|
"calibration/coverage@30%": 0.7465569453021201,
|
|
"calibration/coverage@5%": 0.2433454891943941,
|
|
"calibration/ece": 0.21959701082780061,
|
|
"calibration/mean_confidence": 0.5031970131902879,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017361111111111115,
|
|
"completions/max_length": 3654.4,
|
|
"completions/max_terminated_length": 3654.4,
|
|
"completions/mean_length": 773.315625,
|
|
"completions/mean_terminated_length": 786.9964721679687,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 182.0,
|
|
"epoch": 0.22799715003562457,
|
|
"grad_norm": 0.0003409326309338212,
|
|
"learning_rate": 3.4036144578313257e-06,
|
|
"loss": -0.0146,
|
|
"num_tokens": 205862566.0,
|
|
"reward": 0.9811223030090332,
|
|
"reward_std": 0.14197579324245452,
|
|
"rewards/accuracy_reward": 0.6666666626930237,
|
|
"rewards/batch_coverage_0": 0.2440422922372818,
|
|
"rewards/batch_coverage_1": 0.2440422922372818,
|
|
"rewards/batch_coverage_5": 0.29159528613090513,
|
|
"rewards/brier_reward": 0.7525448679924012,
|
|
"rewards/confidence_uniqueness_reward": 0.9361937284469605,
|
|
"rewards/format_reward": 0.9824652791023254,
|
|
"rewards/frontier_aurc_reward": -0.0013517854968085885,
|
|
"rewards/frontier_coverage_10": -0.01474130041897297,
|
|
"rewards/frontier_coverage_15": -0.01474130041897297,
|
|
"rewards/frontier_coverage_20": -0.01474130041897297,
|
|
"rewards/frontier_coverage_25": -0.01474130041897297,
|
|
"rewards/frontier_ece_reward": 0.006691838824190199,
|
|
"rewards/frontier_entropy_batch_reward": -0.21978738605976106,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15632595121860504,
|
|
"signal/accuracy_reward/group_std_mean": 0.21273251473903657,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07816297560930252,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07816297560930252,
|
|
"signal/advantage_abs_mean": 0.10196487307548523,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10196487307548523,
|
|
"signal/advantage_pre_scale_std": 0.1657386153936386,
|
|
"signal/advantage_std": 0.1657386153936386,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.18472446501255035,
|
|
"signal/batch_coverage_0/group_std_mean": 0.23518528938293456,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0023090558592230082,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0023090558592230082,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.18472446501255035,
|
|
"signal/batch_coverage_1/group_std_mean": 0.23518528938293456,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0023090558592230082,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0023090558592230082,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1998664140701294,
|
|
"signal/batch_coverage_5/group_std_mean": 0.25343605875968933,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.002498330222442746,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002498330222442746,
|
|
"signal/brier_reward/centered_abs_mean": 0.17327214777469635,
|
|
"signal/brier_reward/group_std_mean": 0.21842075288295745,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017327216640114784,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017327216640114784,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.038078039139509204,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06582550406455993,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038078039418905974,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038078039418905974,
|
|
"signal/format_reward/centered_abs_mean": 0.02944878488779068,
|
|
"signal/format_reward/group_std_mean": 0.05599236488342285,
|
|
"signal/format_reward/group_zero_std_frac": 0.7694444537162781,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01472439244389534,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01472439244389534,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012449646135792136,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002002195152454078,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.556205825181678e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.556205825181678e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2244533360004425,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2934238314628601,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00280566681176424,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00280566681176424,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2244533360004425,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2934238314628601,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00280566681176424,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00280566681176424,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2244533360004425,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2934238314628601,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00280566681176424,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00280566681176424,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2244533360004425,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2934238314628601,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00280566681176424,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00280566681176424,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04348489865660667,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06056670844554901,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004348490107804537,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004348490107804537,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29913710951805117,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37389096021652224,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029913710430264473,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029913710430264473,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.17096899602949367,
|
|
"calibration/batch_distribution_entropy": 0.9765716250357661,
|
|
"calibration/buffer_distribution_entropy": 0.9212345922377718,
|
|
"calibration/confidence_entropy": 0.4909447982660627,
|
|
"calibration/coverage@0%": 0.004777855942667238,
|
|
"calibration/coverage@1%": 0.004777855942667238,
|
|
"calibration/coverage@10%": 0.31318627907191576,
|
|
"calibration/coverage@15%": 0.541647094407127,
|
|
"calibration/coverage@20%": 0.6926650746499783,
|
|
"calibration/coverage@25%": 0.8630578420940627,
|
|
"calibration/coverage@30%": 0.9158785557210753,
|
|
"calibration/coverage@5%": 0.10126970692813755,
|
|
"calibration/ece": 0.1609802425152892,
|
|
"calibration/mean_confidence": 0.5649440377301433,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02378472222222221,
|
|
"completions/max_length": 3727.0,
|
|
"completions/max_terminated_length": 3727.0,
|
|
"completions/mean_length": 769.3474975585938,
|
|
"completions/mean_terminated_length": 788.0228271484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 189.6,
|
|
"epoch": 0.23999700003749952,
|
|
"grad_norm": 0.00037615594919770956,
|
|
"learning_rate": 3.2530120481927713e-06,
|
|
"loss": -0.0192,
|
|
"num_tokens": 217824521.0,
|
|
"reward": 0.9850692868232727,
|
|
"reward_std": 0.1487916111946106,
|
|
"rewards/accuracy_reward": 0.6770833253860473,
|
|
"rewards/batch_coverage_0": 0.26757666766643523,
|
|
"rewards/batch_coverage_1": 0.26757666766643523,
|
|
"rewards/batch_coverage_5": 0.31190499663352966,
|
|
"rewards/brier_reward": 0.7826770901679992,
|
|
"rewards/confidence_uniqueness_reward": 0.9283072233200074,
|
|
"rewards/format_reward": 0.9762152791023254,
|
|
"rewards/frontier_aurc_reward": -0.0013085834216326475,
|
|
"rewards/frontier_coverage_10": 0.011328295804560184,
|
|
"rewards/frontier_coverage_15": 0.011328295804560184,
|
|
"rewards/frontier_coverage_20": 0.011328295804560184,
|
|
"rewards/frontier_coverage_25": 0.011328295804560184,
|
|
"rewards/frontier_ece_reward": 0.013636208139359951,
|
|
"rewards/frontier_entropy_batch_reward": -0.25180360674858093,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1661892354488373,
|
|
"signal/accuracy_reward/group_std_mean": 0.2210302770137787,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3638888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08309461772441865,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08309461772441865,
|
|
"signal/advantage_abs_mean": 0.10792450010776519,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10792450010776519,
|
|
"signal/advantage_pre_scale_std": 0.1757599115371704,
|
|
"signal/advantage_std": 0.1757599115371704,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.17572926580905915,
|
|
"signal/batch_coverage_0/group_std_mean": 0.22546787559986115,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.002196615794673562,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002196615794673562,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.17572926580905915,
|
|
"signal/batch_coverage_1/group_std_mean": 0.22546787559986115,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.002196615794673562,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002196615794673562,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.18886646628379822,
|
|
"signal/batch_coverage_5/group_std_mean": 0.24155567288398744,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.002360830921679735,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002360830921679735,
|
|
"signal/brier_reward/centered_abs_mean": 0.16022645235061644,
|
|
"signal/brier_reward/group_std_mean": 0.2063124567270279,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01602264493703842,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01602264493703842,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04420771449804306,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07368545606732368,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004420771403238178,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004420771403238178,
|
|
"signal/format_reward/centered_abs_mean": 0.03523220494389534,
|
|
"signal/format_reward/group_std_mean": 0.06334922239184379,
|
|
"signal/format_reward/group_zero_std_frac": 0.7500000119209289,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01761610247194767,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01761610247194767,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014667298644781112,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023501884657889604,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.833412352425512e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.833412352425512e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1967997133731842,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26129592061042783,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002459996426478028,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002459996426478028,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1967997133731842,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26129592061042783,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002459996426478028,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002459996426478028,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1967997133731842,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.26129592061042783,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002459996426478028,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002459996426478028,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1967997133731842,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.26129592061042783,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002459996426478028,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002459996426478028,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04168285354971886,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05891842097043991,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004168285336345434,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004168285336345434,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3096928000450134,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38175403475761416,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03096928186714649,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03096928186714649,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.23999700003749952,
|
|
"eval_calibration/aurc": 0.18445899082012837,
|
|
"eval_calibration/batch_distribution_entropy": 0.9108313172231045,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9246354026341649,
|
|
"eval_calibration/confidence_entropy": 0.4888134951231186,
|
|
"eval_calibration/coverage@0%": 0.18649193548387097,
|
|
"eval_calibration/coverage@1%": 0.18649193548387097,
|
|
"eval_calibration/coverage@10%": 0.35131048387096775,
|
|
"eval_calibration/coverage@15%": 0.5582997311827956,
|
|
"eval_calibration/coverage@20%": 0.6580981182795699,
|
|
"eval_calibration/coverage@25%": 0.747983870967742,
|
|
"eval_calibration/coverage@30%": 0.8797043010752689,
|
|
"eval_calibration/coverage@5%": 0.197244623655914,
|
|
"eval_calibration/ece": 0.2507259916379931,
|
|
"eval_calibration/mean_confidence": 0.6016621548689268,
|
|
"eval_completions/clipped_ratio": 0.020833333333333332,
|
|
"eval_completions/max_length": 2518.6666666666665,
|
|
"eval_completions/max_terminated_length": 2518.6666666666665,
|
|
"eval_completions/mean_length": 739.6960245768229,
|
|
"eval_completions/mean_terminated_length": 755.5581665039062,
|
|
"eval_completions/min_length": 63.166666666666664,
|
|
"eval_completions/min_terminated_length": 247.83333333333334,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 217824521.0,
|
|
"eval_reward": 0.8968092302481333,
|
|
"eval_reward_std": 0.25434887657562893,
|
|
"eval_rewards/accuracy_reward": 0.6779513756434122,
|
|
"eval_rewards/batch_coverage_0": 0.010313547061135372,
|
|
"eval_rewards/batch_coverage_1": 0.010313547061135372,
|
|
"eval_rewards/batch_coverage_5": 0.013707367160047093,
|
|
"eval_rewards/brier_reward": 0.7806276381015778,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8719328244527181,
|
|
"eval_rewards/format_reward": 0.9765625,
|
|
"eval_rewards/frontier_aurc_reward": -0.0015237164722445111,
|
|
"eval_rewards/frontier_coverage_10": 0.0022721167964239917,
|
|
"eval_rewards/frontier_coverage_15": 0.0022721167964239917,
|
|
"eval_rewards/frontier_coverage_20": 0.0022721167964239917,
|
|
"eval_rewards/frontier_coverage_25": 0.0022721167964239917,
|
|
"eval_rewards/frontier_ece_reward": 0.01428735147540768,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9765625,
|
|
"eval_runtime": 210.7505,
|
|
"eval_samples_per_second": 4.745,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4211697081724803,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4642757972081502,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21058485408624014,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21058485408624014,
|
|
"eval_signal/advantage_abs_mean": 0.21520965298016867,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21520965298016867,
|
|
"eval_signal/advantage_pre_scale_std": 0.2539440269271533,
|
|
"eval_signal/advantage_std": 0.2539440269271533,
|
|
"eval_signal/batch_coverage_0/centered_abs_mean": 0.17860300342241922,
|
|
"eval_signal/batch_coverage_0/group_std_mean": 0.26767698178688687,
|
|
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.002232537566063305,
|
|
"eval_signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.002232537566063305,
|
|
"eval_signal/batch_coverage_1/centered_abs_mean": 0.17860300342241922,
|
|
"eval_signal/batch_coverage_1/group_std_mean": 0.26767698178688687,
|
|
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.002232537566063305,
|
|
"eval_signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.002232537566063305,
|
|
"eval_signal/batch_coverage_5/centered_abs_mean": 0.17452366650104523,
|
|
"eval_signal/batch_coverage_5/group_std_mean": 0.26059675465027493,
|
|
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0021815458700681725,
|
|
"eval_signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.0021815458700681725,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.20681633551915488,
|
|
"eval_signal/brier_reward/group_std_mean": 0.26719488948583603,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020681633924444515,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.020681633924444515,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07006249266366164,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.12485801925261815,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007006249623373151,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007006249623373151,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.044325086288154125,
|
|
"eval_signal/format_reward/group_std_mean": 0.10761458054184914,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.4722222288449605,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.022162543144077063,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.022162543144077063,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0023161165105799832,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004349419303859274,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8951456746047672e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8951456746047672e-05,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.23459876825412115,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.3416580508152644,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002932484649742643,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002932484649742643,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.23459876825412115,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.3416580508152644,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002932484649742643,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002932484649742643,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.23459876825412115,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.3416580508152644,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002932484649742643,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002932484649742643,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.23459876825412115,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.3416580508152644,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002932484649742643,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002932484649742643,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.04435273508230845,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.06427487296362717,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0044352737022563815,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0044352737022563815,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.044325086288154125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.10761458054184914,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.4722222288449605,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004432508799557884,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.004432508799557884,
|
|
"eval_steps_per_second": 0.028,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.281593778766545,
|
|
"calibration/batch_distribution_entropy": 0.9419061803603992,
|
|
"calibration/buffer_distribution_entropy": 0.9258640938636624,
|
|
"calibration/confidence_entropy": 0.49888536043213083,
|
|
"calibration/coverage@0%": 0.03011447680157368,
|
|
"calibration/coverage@1%": 0.03011447680157368,
|
|
"calibration/coverage@10%": 0.16408625099512206,
|
|
"calibration/coverage@15%": 0.21532953056501455,
|
|
"calibration/coverage@20%": 0.27314703514062655,
|
|
"calibration/coverage@25%": 0.4321146266417758,
|
|
"calibration/coverage@30%": 0.525243477016095,
|
|
"calibration/coverage@5%": 0.1145230789521113,
|
|
"calibration/ece": 0.13445261241099055,
|
|
"calibration/mean_confidence": 0.6313762938304984,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02786458333333335,
|
|
"completions/max_length": 3606.6,
|
|
"completions/max_terminated_length": 3606.6,
|
|
"completions/mean_length": 763.4105102539063,
|
|
"completions/mean_terminated_length": 785.3501831054688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 163.2,
|
|
"epoch": 0.2519968500393745,
|
|
"grad_norm": 0.000371407950296998,
|
|
"learning_rate": 3.1024096385542172e-06,
|
|
"loss": -0.0242,
|
|
"num_tokens": 229695874.0,
|
|
"reward": 0.9728497266769409,
|
|
"reward_std": 0.15300759375095369,
|
|
"rewards/accuracy_reward": 0.6702257037162781,
|
|
"rewards/batch_coverage_0": 0.2623316437005997,
|
|
"rewards/batch_coverage_1": 0.2623316437005997,
|
|
"rewards/batch_coverage_5": 0.3089905083179474,
|
|
"rewards/brier_reward": 0.7880232572555542,
|
|
"rewards/confidence_uniqueness_reward": 0.920028030872345,
|
|
"rewards/format_reward": 0.971874988079071,
|
|
"rewards/frontier_aurc_reward": -0.0013586697168648243,
|
|
"rewards/frontier_coverage_10": 0.013760669692419469,
|
|
"rewards/frontier_coverage_15": 0.013760669692419469,
|
|
"rewards/frontier_coverage_20": 0.013760669692419469,
|
|
"rewards/frontier_coverage_25": 0.013760669692419469,
|
|
"rewards/frontier_ece_reward": 0.014541861787438392,
|
|
"rewards/frontier_entropy_batch_reward": -0.3155164361000061,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1584689661860466,
|
|
"signal/accuracy_reward/group_std_mean": 0.21362460255622864,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0792344830930233,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0792344830930233,
|
|
"signal/advantage_abs_mean": 0.11144402623176575,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11144402623176575,
|
|
"signal/advantage_pre_scale_std": 0.18166714310646057,
|
|
"signal/advantage_std": 0.18166714310646057,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.1618078112602234,
|
|
"signal/batch_coverage_0/group_std_mean": 0.21025415658950805,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.002022597729228437,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002022597729228437,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.1618078112602234,
|
|
"signal/batch_coverage_1/group_std_mean": 0.21025415658950805,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.002022597729228437,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002022597729228437,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1763445556163788,
|
|
"signal/batch_coverage_5/group_std_mean": 0.22784494161605834,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0022043070290237664,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0022043070290237664,
|
|
"signal/brier_reward/centered_abs_mean": 0.15077899992465973,
|
|
"signal/brier_reward/group_std_mean": 0.19481396973133086,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01507790107280016,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01507790107280016,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.052749036252498625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08414890766143798,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005274903681129217,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005274903681129217,
|
|
"signal/format_reward/centered_abs_mean": 0.04283854253590107,
|
|
"signal/format_reward/group_std_mean": 0.07259135991334915,
|
|
"signal/format_reward/group_zero_std_frac": 0.725000011920929,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.021419271267950533,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.021419271267950533,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015893207862973213,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0026022979523986577,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9866510410793125e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9866510410793125e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1694171756505966,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22896546125411987,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021177146816626193,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021177146816626193,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1694171756505966,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22896546125411987,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021177146816626193,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021177146816626193,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1694171756505966,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22896546125411987,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021177146816626193,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021177146816626193,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1694171756505966,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22896546125411987,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021177146816626193,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021177146816626193,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03752191811800003,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05395606607198715,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037521916907280683,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037521916907280683,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.336643385887146,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4062102913856506,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03366433903574943,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03366433903574943,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.17717361977554963,
|
|
"calibration/batch_distribution_entropy": 0.9633449845534061,
|
|
"calibration/buffer_distribution_entropy": 0.9276999553327258,
|
|
"calibration/confidence_entropy": 0.4937159751623333,
|
|
"calibration/coverage@0%": 0.027874875235493347,
|
|
"calibration/coverage@1%": 0.027874875235493347,
|
|
"calibration/coverage@10%": 0.26310936161649656,
|
|
"calibration/coverage@15%": 0.40470202499875485,
|
|
"calibration/coverage@20%": 0.6669962991984029,
|
|
"calibration/coverage@25%": 0.8043059996369781,
|
|
"calibration/coverage@30%": 0.9216598483684952,
|
|
"calibration/coverage@5%": 0.14365961058684623,
|
|
"calibration/ece": 0.1422479489315213,
|
|
"calibration/mean_confidence": 0.5891823306356954,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.020920138888888884,
|
|
"completions/max_length": 3806.8,
|
|
"completions/max_terminated_length": 3806.8,
|
|
"completions/mean_length": 787.9717163085937,
|
|
"completions/mean_terminated_length": 804.8591552734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 187.4,
|
|
"epoch": 0.2639967000412495,
|
|
"grad_norm": 0.0003503327025100589,
|
|
"learning_rate": 2.9518072289156627e-06,
|
|
"loss": -0.0169,
|
|
"num_tokens": 241881756.0,
|
|
"reward": 0.997657060623169,
|
|
"reward_std": 0.1417122393846512,
|
|
"rewards/accuracy_reward": 0.7091145753860474,
|
|
"rewards/batch_coverage_0": 0.2294553130865097,
|
|
"rewards/batch_coverage_1": 0.2294553130865097,
|
|
"rewards/batch_coverage_5": 0.2917938411235809,
|
|
"rewards/brier_reward": 0.775479543209076,
|
|
"rewards/confidence_uniqueness_reward": 0.9305724382400513,
|
|
"rewards/format_reward": 0.9789930701255798,
|
|
"rewards/frontier_aurc_reward": -0.0011566867819055916,
|
|
"rewards/frontier_coverage_10": -0.02423563809716143,
|
|
"rewards/frontier_coverage_15": -0.02423563809716143,
|
|
"rewards/frontier_coverage_20": -0.02423563809716143,
|
|
"rewards/frontier_coverage_25": -0.02423563809716143,
|
|
"rewards/frontier_ece_reward": 0.006674250774085522,
|
|
"rewards/frontier_entropy_batch_reward": -0.25826927125453947,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15519205927848817,
|
|
"signal/accuracy_reward/group_std_mean": 0.21122410893440247,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3694444537162781,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07759602963924409,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07759602963924409,
|
|
"signal/advantage_abs_mean": 0.103163842856884,
|
|
"signal/advantage_pre_scale_abs_mean": 0.103163842856884,
|
|
"signal/advantage_pre_scale_std": 0.16863756477832795,
|
|
"signal/advantage_std": 0.16863756477832795,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.15613795518875123,
|
|
"signal/batch_coverage_0/group_std_mean": 0.20137605369091033,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0019517245469614863,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019517245469614863,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.15613795518875123,
|
|
"signal/batch_coverage_1/group_std_mean": 0.20137605369091033,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0019517245469614863,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019517245469614863,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.173554927110672,
|
|
"signal/batch_coverage_5/group_std_mean": 0.22366454005241393,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0021694366820156573,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0021694366820156573,
|
|
"signal/brier_reward/centered_abs_mean": 0.14830400347709655,
|
|
"signal/brier_reward/group_std_mean": 0.19163168966770172,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014830400981009007,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014830400981009007,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04105582907795906,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06620915308594703,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004105583066120744,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004105583066120744,
|
|
"signal/format_reward/centered_abs_mean": 0.03132595494389534,
|
|
"signal/format_reward/group_std_mean": 0.054777097702026364,
|
|
"signal/format_reward/group_zero_std_frac": 0.7861111164093018,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01566297747194767,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01566297747194767,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012045591603964568,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002010233420878649,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5056990014272742e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5056990014272742e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18861688375473024,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2493163228034973,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023577109910547732,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023577109910547732,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18861688375473024,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2493163228034973,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023577109910547732,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023577109910547732,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18861688375473024,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2493163228034973,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023577109910547732,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023577109910547732,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18861688375473024,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2493163228034973,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023577109910547732,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023577109910547732,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03666195943951607,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05227618217468262,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036661958321928976,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036661958321928976,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31750316023826597,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3886495649814606,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03175031580030918,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03175031580030918,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26235029694400336,
|
|
"calibration/batch_distribution_entropy": 0.975106372566205,
|
|
"calibration/buffer_distribution_entropy": 0.9322215986959075,
|
|
"calibration/confidence_entropy": 0.505391946596717,
|
|
"calibration/coverage@0%": 0.028582030034235918,
|
|
"calibration/coverage@1%": 0.028582030034235918,
|
|
"calibration/coverage@10%": 0.20571195144724558,
|
|
"calibration/coverage@15%": 0.3541822284469343,
|
|
"calibration/coverage@20%": 0.44006399782135075,
|
|
"calibration/coverage@25%": 0.5695625194522254,
|
|
"calibration/coverage@30%": 0.6569358854652971,
|
|
"calibration/coverage@5%": 0.0781587496109555,
|
|
"calibration/ece": 0.17193816815797208,
|
|
"calibration/mean_confidence": 0.5251815368338295,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02864583333333335,
|
|
"completions/max_length": 3921.2,
|
|
"completions/max_terminated_length": 3921.2,
|
|
"completions/mean_length": 774.7497436523438,
|
|
"completions/mean_terminated_length": 797.7675659179688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 193.0,
|
|
"epoch": 0.27599655004312446,
|
|
"grad_norm": 0.00033246027305722237,
|
|
"learning_rate": 2.8012048192771087e-06,
|
|
"loss": -0.0231,
|
|
"num_tokens": 253886073.0,
|
|
"reward": 0.9680049657821655,
|
|
"reward_std": 0.15525253415107726,
|
|
"rewards/accuracy_reward": 0.6572048664093018,
|
|
"rewards/batch_coverage_0": 0.24308712184429168,
|
|
"rewards/batch_coverage_1": 0.24308712184429168,
|
|
"rewards/batch_coverage_5": 0.28743329644203186,
|
|
"rewards/brier_reward": 0.7592343688011169,
|
|
"rewards/confidence_uniqueness_reward": 0.9235189318656921,
|
|
"rewards/format_reward": 0.9712673544883728,
|
|
"rewards/frontier_aurc_reward": -0.0014267943566665053,
|
|
"rewards/frontier_coverage_10": 8.734003640711307e-05,
|
|
"rewards/frontier_coverage_15": 8.734003640711307e-05,
|
|
"rewards/frontier_coverage_20": 8.734003640711307e-05,
|
|
"rewards/frontier_coverage_25": 8.734003640711307e-05,
|
|
"rewards/frontier_ece_reward": 0.007573700416833163,
|
|
"rewards/frontier_entropy_batch_reward": -0.24920465648174286,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.172509765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.22167879045009614,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.39166666865348815,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0862548828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0862548828125,
|
|
"signal/advantage_abs_mean": 0.11575937569141388,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11575937569141388,
|
|
"signal/advantage_pre_scale_std": 0.1854647934436798,
|
|
"signal/advantage_std": 0.1854647934436798,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.16924710273742677,
|
|
"signal/batch_coverage_0/group_std_mean": 0.21742305159568787,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0021155887749046086,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021155887749046086,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.16924710273742677,
|
|
"signal/batch_coverage_1/group_std_mean": 0.21742305159568787,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0021155887749046086,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021155887749046086,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.18384623527526855,
|
|
"signal/batch_coverage_5/group_std_mean": 0.23590743243694307,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0022980780340731144,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0022980780340731144,
|
|
"signal/brier_reward/centered_abs_mean": 0.1648420959711075,
|
|
"signal/brier_reward/group_std_mean": 0.2085964173078537,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016484210267663002,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016484210267663002,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05172303095459938,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08182380646467209,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005172303132712841,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005172303132712841,
|
|
"signal/format_reward/centered_abs_mean": 0.043212890625,
|
|
"signal/format_reward/group_std_mean": 0.0718956746160984,
|
|
"signal/format_reward/group_zero_std_frac": 0.7388889074325562,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0216064453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0216064453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014172559836879372,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022306596394628285,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7715699868858792e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7715699868858792e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2051616817712784,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2668293207883835,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025645211804658174,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025645211804658174,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2051616817712784,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2668293207883835,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025645211804658174,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025645211804658174,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2051616817712784,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2668293207883835,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025645211804658174,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025645211804658174,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2051616817712784,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2668293207883835,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025645211804658174,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025645211804658174,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.037424100935459136,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05262390598654747,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037424102891236545,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037424102891236545,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32366191744804385,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39591963291168214,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0323661919683218,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0323661919683218,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2859298539956019,
|
|
"calibration/batch_distribution_entropy": 0.964669531470023,
|
|
"calibration/buffer_distribution_entropy": 0.9365811707402238,
|
|
"calibration/confidence_entropy": 0.4723151134638047,
|
|
"calibration/coverage@0%": 0.06291055794403204,
|
|
"calibration/coverage@1%": 0.10060689302256605,
|
|
"calibration/coverage@10%": 0.22741400899740186,
|
|
"calibration/coverage@15%": 0.42181875974578886,
|
|
"calibration/coverage@20%": 0.5049175996187132,
|
|
"calibration/coverage@25%": 0.5344495171716461,
|
|
"calibration/coverage@30%": 0.5665255653336303,
|
|
"calibration/coverage@5%": 0.15599734117344027,
|
|
"calibration/ece": 0.19337395588240786,
|
|
"calibration/mean_confidence": 0.5832871356087996,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.022395833333333327,
|
|
"completions/max_length": 3798.8,
|
|
"completions/max_terminated_length": 3798.8,
|
|
"completions/mean_length": 772.4003662109375,
|
|
"completions/mean_terminated_length": 790.0671020507813,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 216.8,
|
|
"epoch": 0.28799640004499943,
|
|
"grad_norm": 0.0003116682346444577,
|
|
"learning_rate": 2.6506024096385547e-06,
|
|
"loss": -0.0203,
|
|
"num_tokens": 265865981.0,
|
|
"reward": 0.9831430077552795,
|
|
"reward_std": 0.14407259225845337,
|
|
"rewards/accuracy_reward": 0.6763020992279053,
|
|
"rewards/batch_coverage_0": 0.2774762749671936,
|
|
"rewards/batch_coverage_1": 0.2774762749671936,
|
|
"rewards/batch_coverage_5": 0.31537898182868956,
|
|
"rewards/brier_reward": 0.7778678417205811,
|
|
"rewards/confidence_uniqueness_reward": 0.9281416535377502,
|
|
"rewards/format_reward": 0.9776041746139527,
|
|
"rewards/frontier_aurc_reward": -0.0014987385016866027,
|
|
"rewards/frontier_coverage_10": 0.006839688867330551,
|
|
"rewards/frontier_coverage_15": 0.006839688867330551,
|
|
"rewards/frontier_coverage_20": 0.006839688867330551,
|
|
"rewards/frontier_coverage_25": 0.006839688867330551,
|
|
"rewards/frontier_ece_reward": 0.01136172078549862,
|
|
"rewards/frontier_entropy_batch_reward": -0.26749643981456755,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15881618708372117,
|
|
"signal/accuracy_reward/group_std_mean": 0.20964906513690948,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4055555522441864,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07940809354186058,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07940809354186058,
|
|
"signal/advantage_abs_mean": 0.10615242719650268,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10615242719650268,
|
|
"signal/advantage_pre_scale_std": 0.17463763952255248,
|
|
"signal/advantage_std": 0.17463763952255248,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.168888920545578,
|
|
"signal/batch_coverage_0/group_std_mean": 0.21441494226455687,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.002111111581325531,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002111111581325531,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.168888920545578,
|
|
"signal/batch_coverage_1/group_std_mean": 0.21441494226455687,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.002111111581325531,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002111111581325531,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.18052759468555452,
|
|
"signal/batch_coverage_5/group_std_mean": 0.22870538234710694,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0022565949242562056,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0022565949242562056,
|
|
"signal/brier_reward/centered_abs_mean": 0.15385238826274872,
|
|
"signal/brier_reward/group_std_mean": 0.19617920517921447,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015385238453745842,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015385238453745842,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04555445536971092,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07177185565233231,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004555445723235607,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004555445723235607,
|
|
"signal/format_reward/centered_abs_mean": 0.03475477397441864,
|
|
"signal/format_reward/group_std_mean": 0.05912101566791535,
|
|
"signal/format_reward/group_zero_std_frac": 0.7777777791023255,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01737738698720932,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01737738698720932,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016686212038621306,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002557133138179779,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0857765048276632e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0857765048276632e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1900465279817581,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2485613316297531,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023755817208439113,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023755817208439113,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1900465279817581,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2485613316297531,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023755817208439113,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023755817208439113,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1900465279817581,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2485613316297531,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023755817208439113,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023755817208439113,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1900465279817581,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2485613316297531,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023755817208439113,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023755817208439113,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03919210657477379,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05463382974267006,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003919210657477379,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003919210657477379,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31622728109359743,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38765319585800173,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03162272982299328,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03162272982299328,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.14147644180790817,
|
|
"calibration/batch_distribution_entropy": 0.9426057176678698,
|
|
"calibration/buffer_distribution_entropy": 0.9389522821864695,
|
|
"calibration/confidence_entropy": 0.4855506473529016,
|
|
"calibration/coverage@0%": 0.09941287508734994,
|
|
"calibration/coverage@1%": 0.17076704175401664,
|
|
"calibration/coverage@10%": 0.4188850192373753,
|
|
"calibration/coverage@15%": 0.5119985032926558,
|
|
"calibration/coverage@20%": 0.6163114807870812,
|
|
"calibration/coverage@25%": 0.8842963375878737,
|
|
"calibration/coverage@30%": 0.9654351221749341,
|
|
"calibration/coverage@5%": 0.30129620006256824,
|
|
"calibration/ece": 0.15946385050666856,
|
|
"calibration/mean_confidence": 0.6036409325496523,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02230902777777779,
|
|
"completions/max_length": 3313.6,
|
|
"completions/max_terminated_length": 3313.6,
|
|
"completions/mean_length": 776.3560913085937,
|
|
"completions/mean_terminated_length": 794.23583984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 219.2,
|
|
"epoch": 0.2999962500468744,
|
|
"grad_norm": 0.0003594306472223252,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": -0.0201,
|
|
"num_tokens": 277927267.0,
|
|
"reward": 0.986296546459198,
|
|
"reward_std": 0.146352618932724,
|
|
"rewards/accuracy_reward": 0.6840277671813965,
|
|
"rewards/batch_coverage_0": 0.2731864511966705,
|
|
"rewards/batch_coverage_1": 0.2731864511966705,
|
|
"rewards/batch_coverage_5": 0.30946345925331115,
|
|
"rewards/brier_reward": 0.7864047050476074,
|
|
"rewards/confidence_uniqueness_reward": 0.928028118610382,
|
|
"rewards/format_reward": 0.9776909589767456,
|
|
"rewards/frontier_aurc_reward": -0.0011092747794464231,
|
|
"rewards/frontier_coverage_10": 0.0024573837639763952,
|
|
"rewards/frontier_coverage_15": 0.0024573837639763952,
|
|
"rewards/frontier_coverage_20": 0.0024573837639763952,
|
|
"rewards/frontier_coverage_25": 0.0024573837639763952,
|
|
"rewards/frontier_ece_reward": 0.009396951459348202,
|
|
"rewards/frontier_entropy_batch_reward": -0.27752792537212373,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16611327826976777,
|
|
"signal/accuracy_reward/group_std_mean": 0.21805827915668488,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3805555641651154,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08305663913488388,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08305663913488388,
|
|
"signal/advantage_abs_mean": 0.10857584476470947,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10857584476470947,
|
|
"signal/advantage_pre_scale_std": 0.17497023046016694,
|
|
"signal/advantage_std": 0.17497023046016694,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.16775573492050172,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2152125746011734,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0020969467237591743,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0020969467237591743,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.16775573492050172,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2152125746011734,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0020969467237591743,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0020969467237591743,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.17840272486209868,
|
|
"signal/batch_coverage_5/group_std_mean": 0.22756563127040863,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0022300342097878456,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0022300342097878456,
|
|
"signal/brier_reward/centered_abs_mean": 0.15170427560806274,
|
|
"signal/brier_reward/group_std_mean": 0.19522441625595094,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015170427970588207,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015170427970588207,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04475205019116402,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07207442000508309,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004475205158814788,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004475205158814788,
|
|
"signal/format_reward/centered_abs_mean": 0.03493381030857563,
|
|
"signal/format_reward/group_std_mean": 0.06061212494969368,
|
|
"signal/format_reward/group_zero_std_frac": 0.7694444537162781,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.017466905154287816,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.017466905154287816,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001282674679532647,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002113639307208359,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6033433348638936e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6033433348638936e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1927446722984314,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2527609497308731,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024093084037303926,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024093084037303926,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1927446722984314,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2527609497308731,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024093084037303926,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024093084037303926,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1927446722984314,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2527609497308731,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024093084037303926,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024093084037303926,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1927446722984314,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2527609497308731,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024093084037303926,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024093084037303926,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0355650432407856,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.049483828246593475,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003556504426524043,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003556504426524043,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32876399755477903,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39765849709510803,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03287639990448952,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03287639990448952,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23245300056345525,
|
|
"calibration/batch_distribution_entropy": 0.9669630991639009,
|
|
"calibration/buffer_distribution_entropy": 0.9404171564423501,
|
|
"calibration/confidence_entropy": 0.4814706318193302,
|
|
"calibration/coverage@0%": 0.011621216057397759,
|
|
"calibration/coverage@1%": 0.011621216057397759,
|
|
"calibration/coverage@10%": 0.1754077233201808,
|
|
"calibration/coverage@15%": 0.283362538629445,
|
|
"calibration/coverage@20%": 0.4702102688267346,
|
|
"calibration/coverage@25%": 0.6906058145005944,
|
|
"calibration/coverage@30%": 0.8511112267967078,
|
|
"calibration/coverage@5%": 0.06958466253259357,
|
|
"calibration/ece": 0.13568314624392283,
|
|
"calibration/mean_confidence": 0.5655281646008967,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.025694444444444464,
|
|
"completions/max_length": 3650.6,
|
|
"completions/max_terminated_length": 3650.6,
|
|
"completions/mean_length": 815.7385498046875,
|
|
"completions/mean_terminated_length": 837.46806640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 196.4,
|
|
"epoch": 0.3119961000487494,
|
|
"grad_norm": 0.00037133650039322674,
|
|
"learning_rate": 2.349397590361446e-06,
|
|
"loss": -0.02,
|
|
"num_tokens": 290449375.0,
|
|
"reward": 0.9702214956283569,
|
|
"reward_std": 0.15028507709503175,
|
|
"rewards/accuracy_reward": 0.65625,
|
|
"rewards/batch_coverage_0": 0.2516955316066742,
|
|
"rewards/batch_coverage_1": 0.2516955316066742,
|
|
"rewards/batch_coverage_5": 0.27999492585659025,
|
|
"rewards/brier_reward": 0.7730745911598206,
|
|
"rewards/confidence_uniqueness_reward": 0.9261258721351624,
|
|
"rewards/format_reward": 0.9743055582046509,
|
|
"rewards/frontier_aurc_reward": -0.00132467788644135,
|
|
"rewards/frontier_coverage_10": 0.01222042804583907,
|
|
"rewards/frontier_coverage_15": 0.01222042804583907,
|
|
"rewards/frontier_coverage_20": 0.01222042804583907,
|
|
"rewards/frontier_coverage_25": 0.01222042804583907,
|
|
"rewards/frontier_ece_reward": 0.008971604146063327,
|
|
"rewards/frontier_entropy_batch_reward": -0.262603098154068,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17313368022441863,
|
|
"signal/accuracy_reward/group_std_mean": 0.22794292867183685,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3638888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08656684011220932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08656684011220932,
|
|
"signal/advantage_abs_mean": 0.11171528100967407,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11171528100967407,
|
|
"signal/advantage_pre_scale_std": 0.17555816173553468,
|
|
"signal/advantage_std": 0.17555816173553468,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.17122559249401093,
|
|
"signal/batch_coverage_0/group_std_mean": 0.22093523442745208,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.002140319952741265,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002140319952741265,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.17122559249401093,
|
|
"signal/batch_coverage_1/group_std_mean": 0.22093523442745208,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.002140319952741265,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002140319952741265,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1771954357624054,
|
|
"signal/batch_coverage_5/group_std_mean": 0.22708921730518342,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.002214943151921034,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002214943151921034,
|
|
"signal/brier_reward/centered_abs_mean": 0.15775817036628723,
|
|
"signal/brier_reward/group_std_mean": 0.20173568725585939,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015775817446410656,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015775817446410656,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04499040432274341,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07010399773716927,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0044990403577685354,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0044990403577685354,
|
|
"signal/format_reward/centered_abs_mean": 0.035742187313735484,
|
|
"signal/format_reward/group_std_mean": 0.0591585423797369,
|
|
"signal/format_reward/group_zero_std_frac": 0.7805555701255799,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.017871093656867742,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.017871093656867742,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013749219011515378,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022185094188898803,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.71865238371538e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.71865238371538e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20297004878520966,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.266823947429657,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025371257215738297,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025371257215738297,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20297004878520966,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.266823947429657,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025371257215738297,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025371257215738297,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20297004878520966,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.266823947429657,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025371257215738297,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025371257215738297,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20297004878520966,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.266823947429657,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025371257215738297,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025371257215738297,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03541369363665581,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04960825145244598,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003541369317099452,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003541369317099452,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3235150218009949,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3940378069877625,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03235150426626206,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03235150426626206,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20362636551810637,
|
|
"calibration/batch_distribution_entropy": 0.9532780570989763,
|
|
"calibration/buffer_distribution_entropy": 0.9427605212339678,
|
|
"calibration/confidence_entropy": 0.4754555021484623,
|
|
"calibration/coverage@0%": 0.08711736565368992,
|
|
"calibration/coverage@1%": 0.09701319898702328,
|
|
"calibration/coverage@10%": 0.3242640550710837,
|
|
"calibration/coverage@15%": 0.3851703314572362,
|
|
"calibration/coverage@20%": 0.45349569030891085,
|
|
"calibration/coverage@25%": 0.6403679554867728,
|
|
"calibration/coverage@30%": 0.8033515007141947,
|
|
"calibration/coverage@5%": 0.27292674622409996,
|
|
"calibration/ece": 0.155295486974307,
|
|
"calibration/mean_confidence": 0.5909133218280592,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.018229166666666675,
|
|
"completions/max_length": 3749.2,
|
|
"completions/max_terminated_length": 3749.2,
|
|
"completions/mean_length": 801.0372314453125,
|
|
"completions/mean_terminated_length": 815.9605346679688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 186.8,
|
|
"epoch": 0.32399595005062437,
|
|
"grad_norm": 0.00043759445543400943,
|
|
"learning_rate": 2.1987951807228917e-06,
|
|
"loss": -0.016,
|
|
"num_tokens": 302770348.0,
|
|
"reward": 0.9900829911231994,
|
|
"reward_std": 0.14265244007110595,
|
|
"rewards/accuracy_reward": 0.6833333253860474,
|
|
"rewards/batch_coverage_0": 0.2686183601617813,
|
|
"rewards/batch_coverage_1": 0.2686183601617813,
|
|
"rewards/batch_coverage_5": 0.31662521958351136,
|
|
"rewards/brier_reward": 0.7835352897644043,
|
|
"rewards/confidence_uniqueness_reward": 0.9331841111183167,
|
|
"rewards/format_reward": 0.9817708253860473,
|
|
"rewards/frontier_aurc_reward": -0.0011468948214314878,
|
|
"rewards/frontier_coverage_10": 0.00698006252059713,
|
|
"rewards/frontier_coverage_15": 0.00698006252059713,
|
|
"rewards/frontier_coverage_20": 0.00698006252059713,
|
|
"rewards/frontier_coverage_25": 0.00698006252059713,
|
|
"rewards/frontier_ece_reward": 0.008575173746794462,
|
|
"rewards/frontier_entropy_batch_reward": -0.26006550490856173,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1631293386220932,
|
|
"signal/accuracy_reward/group_std_mean": 0.21489474475383757,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38888888955116274,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0815646693110466,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0815646693110466,
|
|
"signal/advantage_abs_mean": 0.1027984693646431,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1027984693646431,
|
|
"signal/advantage_pre_scale_std": 0.16858279407024385,
|
|
"signal/advantage_std": 0.16858279407024385,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.16986936032772065,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2156634271144867,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0021233670879155396,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021233670879155396,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.16986936032772065,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2156634271144867,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0021233670879155396,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021233670879155396,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.18553533852100373,
|
|
"signal/batch_coverage_5/group_std_mean": 0.23565352261066436,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.002319191861897707,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002319191861897707,
|
|
"signal/brier_reward/centered_abs_mean": 0.15136203169822693,
|
|
"signal/brier_reward/group_std_mean": 0.19603108763694763,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015136203169822693,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015136203169822693,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.041097406297922134,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07088088095188141,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0041097409557551146,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0041097409557551146,
|
|
"signal/format_reward/centered_abs_mean": 0.03191189244389534,
|
|
"signal/format_reward/group_std_mean": 0.060290227830410006,
|
|
"signal/format_reward/group_zero_std_frac": 0.7555555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01595594622194767,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01595594622194767,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011837596539407969,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0018570037558674813,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.4796995856158901e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.4796995856158901e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2052822709083557,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26847070157527925,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025660285726189615,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025660285726189615,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2052822709083557,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26847070157527925,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025660285726189615,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025660285726189615,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2052822709083557,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.26847070157527925,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025660285726189615,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025660285726189615,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2052822709083557,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.26847070157527925,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025660285726189615,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025660285726189615,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.035533937811851504,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.048804853856563565,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035533939488232138,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035533939488232138,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3181172013282776,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3875519514083862,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031811722368001935,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031811722368001935,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.11694179421150186,
|
|
"calibration/batch_distribution_entropy": 0.9511068660379134,
|
|
"calibration/buffer_distribution_entropy": 0.9470224601130337,
|
|
"calibration/confidence_entropy": 0.46792154137151637,
|
|
"calibration/coverage@0%": 0.04433651789792815,
|
|
"calibration/coverage@1%": 0.04433651789792815,
|
|
"calibration/coverage@10%": 0.5450407372317464,
|
|
"calibration/coverage@15%": 0.7070475818609679,
|
|
"calibration/coverage@20%": 0.8278544349299921,
|
|
"calibration/coverage@25%": 0.9070786764494283,
|
|
"calibration/coverage@30%": 0.9888614640718003,
|
|
"calibration/coverage@5%": 0.24589149238172475,
|
|
"calibration/ece": 0.12410503563540456,
|
|
"calibration/mean_confidence": 0.5983168562659058,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01866319444444444,
|
|
"completions/max_length": 3657.0,
|
|
"completions/max_terminated_length": 3657.0,
|
|
"completions/mean_length": 808.0329956054687,
|
|
"completions/mean_terminated_length": 823.36103515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 207.2,
|
|
"epoch": 0.33599580005249935,
|
|
"grad_norm": 0.00034611712908372283,
|
|
"learning_rate": 2.0481927710843377e-06,
|
|
"loss": -0.0147,
|
|
"num_tokens": 315183112.0,
|
|
"reward": 0.9862439870834351,
|
|
"reward_std": 0.13900390565395354,
|
|
"rewards/accuracy_reward": 0.6765625,
|
|
"rewards/batch_coverage_0": 0.28147890865802766,
|
|
"rewards/batch_coverage_1": 0.28147890865802766,
|
|
"rewards/batch_coverage_5": 0.3368524968624115,
|
|
"rewards/brier_reward": 0.7892671346664428,
|
|
"rewards/confidence_uniqueness_reward": 0.9314653038978576,
|
|
"rewards/format_reward": 0.98125,
|
|
"rewards/frontier_aurc_reward": -0.001107062050141394,
|
|
"rewards/frontier_coverage_10": 0.01817864943295717,
|
|
"rewards/frontier_coverage_15": 0.01817864943295717,
|
|
"rewards/frontier_coverage_20": 0.01817864943295717,
|
|
"rewards/frontier_coverage_25": 0.01817864943295717,
|
|
"rewards/frontier_ece_reward": 0.010302421916276217,
|
|
"rewards/frontier_entropy_batch_reward": -0.2790845036506653,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15387369990348815,
|
|
"signal/accuracy_reward/group_std_mean": 0.21047138273715973,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.37222222685813905,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07693684995174407,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07693684995174407,
|
|
"signal/advantage_abs_mean": 0.10007860809564591,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10007860809564591,
|
|
"signal/advantage_pre_scale_std": 0.163449290394783,
|
|
"signal/advantage_std": 0.163449290394783,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.16872317790985109,
|
|
"signal/batch_coverage_0/group_std_mean": 0.21727250516414642,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.00210903980769217,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.00210903980769217,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.16872317790985109,
|
|
"signal/batch_coverage_1/group_std_mean": 0.21727250516414642,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.00210903980769217,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.00210903980769217,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.18378091156482695,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2361411601305008,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.002297261357307434,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002297261357307434,
|
|
"signal/brier_reward/centered_abs_mean": 0.14938353896141052,
|
|
"signal/brier_reward/group_std_mean": 0.1946202725172043,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01493835374712944,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01493835374712944,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03957700990140438,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06665479466319084,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003957701055333019,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003957701055333019,
|
|
"signal/format_reward/centered_abs_mean": 0.029296875558793546,
|
|
"signal/format_reward/group_std_mean": 0.0548139251768589,
|
|
"signal/format_reward/group_zero_std_frac": 0.775,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014648437779396773,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.014648437779396773,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013288468355312944,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022456780076026916,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6610585225862452e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6610585225862452e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1944827049970627,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.25859071016311647,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024310338776558638,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024310338776558638,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1944827049970627,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.25859071016311647,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024310338776558638,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024310338776558638,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1944827049970627,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.25859071016311647,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024310338776558638,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024310338776558638,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1944827049970627,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.25859071016311647,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024310338776558638,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024310338776558638,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03382277488708496,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.047228545695543286,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0033822776284068825,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0033822776284068825,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32691980004310606,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3963987946510315,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03269198089838028,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03269198089838028,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1703057850192284,
|
|
"calibration/batch_distribution_entropy": 0.9776350932562503,
|
|
"calibration/buffer_distribution_entropy": 0.9576303402245949,
|
|
"calibration/confidence_entropy": 0.4619619640038877,
|
|
"calibration/coverage@0%": 0.017471368372128496,
|
|
"calibration/coverage@1%": 0.017471368372128496,
|
|
"calibration/coverage@10%": 0.37715968804627453,
|
|
"calibration/coverage@15%": 0.5172201213462484,
|
|
"calibration/coverage@20%": 0.6727254175519178,
|
|
"calibration/coverage@25%": 0.7804807466015112,
|
|
"calibration/coverage@30%": 0.8431443731606982,
|
|
"calibration/coverage@5%": 0.1628647147375088,
|
|
"calibration/ece": 0.15486527377423648,
|
|
"calibration/mean_confidence": 0.5378641362559275,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014843749999999978,
|
|
"completions/max_length": 3701.8,
|
|
"completions/max_terminated_length": 3701.8,
|
|
"completions/mean_length": 793.9427124023438,
|
|
"completions/mean_terminated_length": 805.977294921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 229.4,
|
|
"epoch": 0.34799565005437433,
|
|
"grad_norm": 0.00032631229260005057,
|
|
"learning_rate": 1.8975903614457832e-06,
|
|
"loss": -0.0131,
|
|
"num_tokens": 327393940.0,
|
|
"reward": 1.002014195919037,
|
|
"reward_std": 0.13000169694423674,
|
|
"rewards/accuracy_reward": 0.7035590410232544,
|
|
"rewards/batch_coverage_0": 0.30727551579475404,
|
|
"rewards/batch_coverage_1": 0.30727551579475404,
|
|
"rewards/batch_coverage_5": 0.35222265124320984,
|
|
"rewards/brier_reward": 0.8031177639961242,
|
|
"rewards/confidence_uniqueness_reward": 0.9338420629501343,
|
|
"rewards/format_reward": 0.9850694298744201,
|
|
"rewards/frontier_aurc_reward": -0.0011088799685239792,
|
|
"rewards/frontier_coverage_10": 0.016308632807340472,
|
|
"rewards/frontier_coverage_15": 0.016308632807340472,
|
|
"rewards/frontier_coverage_20": 0.016308632807340472,
|
|
"rewards/frontier_coverage_25": 0.016214244009461254,
|
|
"rewards/frontier_ece_reward": 0.00980039369314909,
|
|
"rewards/frontier_entropy_batch_reward": -0.29861122369766235,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14319118857383728,
|
|
"signal/accuracy_reward/group_std_mean": 0.19803574681282043,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4111111104488373,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07159559428691864,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07159559428691864,
|
|
"signal/advantage_abs_mean": 0.09256956726312637,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09256956726312637,
|
|
"signal/advantage_pre_scale_std": 0.15642723441123962,
|
|
"signal/advantage_std": 0.15642723441123962,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.1624282956123352,
|
|
"signal/batch_coverage_0/group_std_mean": 0.20890582203865052,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0020303537137806417,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0020303537137806417,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.1624282956123352,
|
|
"signal/batch_coverage_1/group_std_mean": 0.20890582203865052,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0020303537137806417,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0020303537137806417,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.17466669380664826,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2239888697862625,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0021833337377756834,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0021833337377756834,
|
|
"signal/brier_reward/centered_abs_mean": 0.14250083565711974,
|
|
"signal/brier_reward/group_std_mean": 0.18544132113456727,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014250084944069385,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014250084944069385,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03743334673345089,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06275248900055885,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003743334859609604,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003743334859609604,
|
|
"signal/format_reward/centered_abs_mean": 0.02568359375,
|
|
"signal/format_reward/group_std_mean": 0.04919163584709167,
|
|
"signal/format_reward/group_zero_std_frac": 0.794444453716278,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014643748523667454,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0024587402120232584,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8304685363546012e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8304685363546012e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18562319576740266,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.247874915599823,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023202899377793075,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023202899377793075,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18562319576740266,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.247874915599823,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023202899377793075,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023202899377793075,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18562319576740266,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.247874915599823,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023202899377793075,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023202899377793075,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17168098390102388,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22989341318607331,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002146012266166508,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002146012266166508,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03160376250743866,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.043816656619310376,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031603761482983826,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031603761482983826,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33895700573921206,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4089750647544861,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033895700424909594,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033895700424909594,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.15447080925497395,
|
|
"calibration/batch_distribution_entropy": 0.9639245069307363,
|
|
"calibration/buffer_distribution_entropy": 0.9672132224677978,
|
|
"calibration/confidence_entropy": 0.474934743527651,
|
|
"calibration/coverage@0%": 0.04504318475566866,
|
|
"calibration/coverage@1%": 0.04504318475566866,
|
|
"calibration/coverage@10%": 0.4371306801956164,
|
|
"calibration/coverage@15%": 0.6027739344922879,
|
|
"calibration/coverage@20%": 0.6805436650832216,
|
|
"calibration/coverage@25%": 0.7423084778995974,
|
|
"calibration/coverage@30%": 0.8075061902409922,
|
|
"calibration/coverage@5%": 0.3228785773292098,
|
|
"calibration/ece": 0.1982343691224806,
|
|
"calibration/mean_confidence": 0.534430820711849,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013975694444444442,
|
|
"completions/max_length": 3749.6,
|
|
"completions/max_terminated_length": 3749.6,
|
|
"completions/mean_length": 882.13291015625,
|
|
"completions/mean_terminated_length": 894.5720825195312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 222.8,
|
|
"epoch": 0.3599955000562493,
|
|
"grad_norm": 0.0003455007972661406,
|
|
"learning_rate": 1.7469879518072292e-06,
|
|
"loss": -0.0109,
|
|
"num_tokens": 340666447.0,
|
|
"reward": 0.9967718958854676,
|
|
"reward_std": 0.13570687919855118,
|
|
"rewards/accuracy_reward": 0.6901041507720947,
|
|
"rewards/batch_coverage_0": 0.2922303080558777,
|
|
"rewards/batch_coverage_1": 0.2922303080558777,
|
|
"rewards/batch_coverage_5": 0.3320302963256836,
|
|
"rewards/brier_reward": 0.7988881587982177,
|
|
"rewards/confidence_uniqueness_reward": 0.935482382774353,
|
|
"rewards/format_reward": 0.9859375,
|
|
"rewards/frontier_aurc_reward": -0.0013168566045351326,
|
|
"rewards/frontier_coverage_10": 0.014929048530757428,
|
|
"rewards/frontier_coverage_15": 0.014929048530757428,
|
|
"rewards/frontier_coverage_20": 0.01817968818359077,
|
|
"rewards/frontier_coverage_25": 0.04145521372556686,
|
|
"rewards/frontier_ece_reward": 0.006141281011514366,
|
|
"rewards/frontier_entropy_batch_reward": -0.2785848528146744,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16552734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.21570127904415132,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.39166667461395266,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.082763671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.082763671875,
|
|
"signal/advantage_abs_mean": 0.10074692964553833,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10074692964553833,
|
|
"signal/advantage_pre_scale_std": 0.16051664352416992,
|
|
"signal/advantage_std": 0.16051664352416992,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.1777266889810562,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2269949346780777,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.002221583598293364,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.002221583598293364,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.1777266889810562,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2269949346780777,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.002221583598293364,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.002221583598293364,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.18860393166542053,
|
|
"signal/batch_coverage_5/group_std_mean": 0.24127667844295503,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0023575491271913052,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0023575491271913052,
|
|
"signal/brier_reward/centered_abs_mean": 0.14226683378219604,
|
|
"signal/brier_reward/group_std_mean": 0.18378482460975648,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014226683229207993,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014226683229207993,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.034204688295722006,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05667965784668923,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003420468932017684,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003420468932017684,
|
|
"signal/format_reward/centered_abs_mean": 0.02274305522441864,
|
|
"signal/format_reward/group_std_mean": 0.04305711500346661,
|
|
"signal/format_reward/group_zero_std_frac": 0.819444453716278,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01137152761220932,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01137152761220932,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001726186671294272,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002951366687193513,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1577333609457127e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1577333609457127e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19112130999565125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.25123822391033174,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023890164215117694,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023890164215117694,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19112130999565125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.25123822391033174,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023890164215117694,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023890164215117694,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17106458246707917,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2264966309070587,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002138307271525264,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002138307271525264,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07471532374620438,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09947905391454696,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009339415351860225,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009339415351860225,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02742462418973446,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0365887500345707,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027424625121057035,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027424625121057035,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32765315771102904,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3976347029209137,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032765316963195804,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032765316963195804,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.3599955000562493,
|
|
"eval_calibration/aurc": 0.12898595314114938,
|
|
"eval_calibration/batch_distribution_entropy": 0.9285040644778836,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9722194296652075,
|
|
"eval_calibration/confidence_entropy": 0.5145721051071591,
|
|
"eval_calibration/coverage@0%": 0.23840725806451613,
|
|
"eval_calibration/coverage@1%": 0.23840725806451613,
|
|
"eval_calibration/coverage@10%": 0.4338037634408602,
|
|
"eval_calibration/coverage@15%": 0.7686491935483871,
|
|
"eval_calibration/coverage@20%": 0.8637432795698925,
|
|
"eval_calibration/coverage@25%": 0.9373319892473119,
|
|
"eval_calibration/coverage@30%": 0.9947916666666666,
|
|
"eval_calibration/coverage@5%": 0.2867943548387097,
|
|
"eval_calibration/ece": 0.23526666207644473,
|
|
"eval_calibration/mean_confidence": 0.5678511811520228,
|
|
"eval_completions/clipped_ratio": 0.01215277777777779,
|
|
"eval_completions/max_length": 2915.8333333333335,
|
|
"eval_completions/max_terminated_length": 2915.8333333333335,
|
|
"eval_completions/mean_length": 843.0679016113281,
|
|
"eval_completions/mean_terminated_length": 853.5554402669271,
|
|
"eval_completions/min_length": 61.5,
|
|
"eval_completions/min_terminated_length": 279.6666666666667,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 340666447.0,
|
|
"eval_reward": 0.9001721441745758,
|
|
"eval_reward_std": 0.23989658802747726,
|
|
"eval_rewards/accuracy_reward": 0.6710069477558136,
|
|
"eval_rewards/batch_coverage_0": 0.022330415435135365,
|
|
"eval_rewards/batch_coverage_1": 0.022330415435135365,
|
|
"eval_rewards/batch_coverage_5": 0.02772203693166375,
|
|
"eval_rewards/brier_reward": 0.7917285561561584,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8836350838343302,
|
|
"eval_rewards/format_reward": 0.9852430621782938,
|
|
"eval_rewards/frontier_aurc_reward": -0.0014331555964114766,
|
|
"eval_rewards/frontier_coverage_10": 0.023409837027429603,
|
|
"eval_rewards/frontier_coverage_15": 0.023409837027429603,
|
|
"eval_rewards/frontier_coverage_20": 0.026956443985303242,
|
|
"eval_rewards/frontier_coverage_25": 0.056417365868886314,
|
|
"eval_rewards/frontier_ece_reward": 0.005207664585517098,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9852430621782938,
|
|
"eval_runtime": 208.6557,
|
|
"eval_samples_per_second": 4.793,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4315863748391469,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4713868449131648,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21579318741957346,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21579318741957346,
|
|
"eval_signal/advantage_abs_mean": 0.20765997717777887,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20765997717777887,
|
|
"eval_signal/advantage_pre_scale_std": 0.23849675059318542,
|
|
"eval_signal/advantage_std": 0.23849675059318542,
|
|
"eval_signal/batch_coverage_0/centered_abs_mean": 0.20339750746885935,
|
|
"eval_signal/batch_coverage_0/group_std_mean": 0.28966983159383136,
|
|
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0025424689520150423,
|
|
"eval_signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.0025424689520150423,
|
|
"eval_signal/batch_coverage_1/centered_abs_mean": 0.20339750746885935,
|
|
"eval_signal/batch_coverage_1/group_std_mean": 0.28966983159383136,
|
|
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0025424689520150423,
|
|
"eval_signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.0025424689520150423,
|
|
"eval_signal/batch_coverage_5/centered_abs_mean": 0.1966564084092776,
|
|
"eval_signal/batch_coverage_5/group_std_mean": 0.2772370899717013,
|
|
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.002458205233172824,
|
|
"eval_signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.002458205233172824,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.19103506952524185,
|
|
"eval_signal/brier_reward/group_std_mean": 0.24966617921988168,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01910350751131773,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01910350751131773,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05553654643396536,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09764633762339751,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005553654511459172,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005553654511459172,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.02826605907951792,
|
|
"eval_signal/format_reward/group_std_mean": 0.07450965698808432,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.6111111293236414,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.01413302953975896,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.01413302953975896,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0023859492309081056,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005178831284865737,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9824365507617284e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9824365507617284e-05,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.29903774956862134,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4070753405491511,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037379717299093804,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037379717299093804,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.29903774956862134,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4070753405491511,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037379717299093804,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037379717299093804,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.18226321786642075,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.26183462888002396,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022782902621353665,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022782902621353665,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.08711665496230125,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.11587635427713394,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010889582335948944,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010889582335948944,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.03477179010709127,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.04579935657481352,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003477179096080363,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003477179096080363,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.02826605907951792,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.07450965698808432,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6111111293236414,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.002826606078694264,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002826606078694264,
|
|
"eval_steps_per_second": 0.029,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1394387315388774,
|
|
"calibration/batch_distribution_entropy": 0.9788088149720707,
|
|
"calibration/buffer_distribution_entropy": 0.9747806855913861,
|
|
"calibration/confidence_entropy": 0.4963554955137409,
|
|
"calibration/coverage@0%": 0.01860834112423099,
|
|
"calibration/coverage@1%": 0.01860834112423099,
|
|
"calibration/coverage@10%": 0.5299976057724558,
|
|
"calibration/coverage@15%": 0.677735704619086,
|
|
"calibration/coverage@20%": 0.7812693942832146,
|
|
"calibration/coverage@25%": 0.8555657320957446,
|
|
"calibration/coverage@30%": 0.9199157929271526,
|
|
"calibration/coverage@5%": 0.1910101460964185,
|
|
"calibration/ece": 0.21350837129670439,
|
|
"calibration/mean_confidence": 0.5446313212061893,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013107638888888907,
|
|
"completions/max_length": 3513.4,
|
|
"completions/max_terminated_length": 3513.4,
|
|
"completions/mean_length": 827.4097290039062,
|
|
"completions/mean_terminated_length": 838.625732421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 237.2,
|
|
"epoch": 0.3719953500581243,
|
|
"grad_norm": 0.0003739822132047266,
|
|
"learning_rate": 1.5963855421686747e-06,
|
|
"loss": -0.0094,
|
|
"num_tokens": 353305919.0,
|
|
"reward": 1.019690752029419,
|
|
"reward_std": 0.1350053295493126,
|
|
"rewards/accuracy_reward": 0.7385416746139526,
|
|
"rewards/batch_coverage_0": 0.26552567183971404,
|
|
"rewards/batch_coverage_1": 0.26552567183971404,
|
|
"rewards/batch_coverage_5": 0.3139073848724365,
|
|
"rewards/brier_reward": 0.7964405417442322,
|
|
"rewards/confidence_uniqueness_reward": 0.9369457364082336,
|
|
"rewards/format_reward": 0.9868923664093018,
|
|
"rewards/frontier_aurc_reward": -0.0009805824258364737,
|
|
"rewards/frontier_coverage_10": -0.019329096376895904,
|
|
"rewards/frontier_coverage_15": -0.016208803188055752,
|
|
"rewards/frontier_coverage_20": 0.013247845345176756,
|
|
"rewards/frontier_coverage_25": 0.08285010010004043,
|
|
"rewards/frontier_ece_reward": -0.00014433086871576962,
|
|
"rewards/frontier_entropy_batch_reward": -0.27657235562801363,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16686197817325593,
|
|
"signal/accuracy_reward/group_std_mean": 0.21733638942241668,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38888888359069823,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08343098908662797,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08343098908662797,
|
|
"signal/advantage_abs_mean": 0.10120506733655929,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10120506733655929,
|
|
"signal/advantage_pre_scale_std": 0.1630137413740158,
|
|
"signal/advantage_std": 0.1630137413740158,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.1528965026140213,
|
|
"signal/batch_coverage_0/group_std_mean": 0.19621860086917878,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.001911206333898008,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.001911206333898008,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.1528965026140213,
|
|
"signal/batch_coverage_1/group_std_mean": 0.19621860086917878,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.001911206333898008,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.001911206333898008,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.16557470858097076,
|
|
"signal/batch_coverage_5/group_std_mean": 0.21307723820209504,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0020696839783340693,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020696839783340693,
|
|
"signal/brier_reward/centered_abs_mean": 0.13954072892665864,
|
|
"signal/brier_reward/group_std_mean": 0.17854183316230773,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013954073563218117,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013954073563218117,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03350038155913353,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05401670783758163,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003350038221105933,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003350038221105933,
|
|
"signal/format_reward/centered_abs_mean": 0.022086588852107524,
|
|
"signal/format_reward/group_std_mean": 0.040450767055153845,
|
|
"signal/format_reward/group_zero_std_frac": 0.8361111164093018,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011043294426053762,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.011043294426053762,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014425348956137896,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002580096502788365,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8031687068287285e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8031687068287285e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1961042582988739,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2569017678499222,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002451303321868181,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002451303321868181,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1862110137939453,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24439649879932404,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00232763784006238,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00232763784006238,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09592682868242264,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12885407060384751,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011990853352472186,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011990853352472186,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06761858016252517,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08734526634216308,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008452322450466454,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008452322450466454,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.025069399923086166,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03274794705212116,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025069401133805515,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025069401133805515,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33268231749534605,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4020723164081573,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03326823078095913,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03326823078095913,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.12093013501895719,
|
|
"calibration/batch_distribution_entropy": 0.9465707409362046,
|
|
"calibration/buffer_distribution_entropy": 0.9797023876168109,
|
|
"calibration/confidence_entropy": 0.49057445467289995,
|
|
"calibration/coverage@0%": 0.14197996116913858,
|
|
"calibration/coverage@1%": 0.21736492447170228,
|
|
"calibration/coverage@10%": 0.6383462062363822,
|
|
"calibration/coverage@15%": 0.7202173228346458,
|
|
"calibration/coverage@20%": 0.7690792650918635,
|
|
"calibration/coverage@25%": 0.8610603674540682,
|
|
"calibration/coverage@30%": 0.9002666666666667,
|
|
"calibration/coverage@5%": 0.41920036616283207,
|
|
"calibration/ece": 0.1681717109765247,
|
|
"calibration/mean_confidence": 0.6029968433607082,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015885416666666673,
|
|
"completions/max_length": 3586.6,
|
|
"completions/max_terminated_length": 3586.6,
|
|
"completions/mean_length": 844.99375,
|
|
"completions/mean_terminated_length": 858.6648681640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 229.0,
|
|
"epoch": 0.38399520005999926,
|
|
"grad_norm": 0.0003149795229546726,
|
|
"learning_rate": 1.4457831325301204e-06,
|
|
"loss": -0.0124,
|
|
"num_tokens": 366127543.0,
|
|
"reward": 0.9901588439941407,
|
|
"reward_std": 0.13654743432998656,
|
|
"rewards/accuracy_reward": 0.6759548664093018,
|
|
"rewards/batch_coverage_0": 0.2863477557897568,
|
|
"rewards/batch_coverage_1": 0.2863477557897568,
|
|
"rewards/batch_coverage_5": 0.33578619360923767,
|
|
"rewards/brier_reward": 0.7955960988998413,
|
|
"rewards/confidence_uniqueness_reward": 0.9340329527854919,
|
|
"rewards/format_reward": 0.9840277791023254,
|
|
"rewards/frontier_aurc_reward": -0.0017221482703462243,
|
|
"rewards/frontier_coverage_10": 0.02007829153444618,
|
|
"rewards/frontier_coverage_15": 0.021873852517455817,
|
|
"rewards/frontier_coverage_20": 0.03474405147135258,
|
|
"rewards/frontier_coverage_25": 0.1012751117348671,
|
|
"rewards/frontier_ece_reward": 0.0031474192626774313,
|
|
"rewards/frontier_entropy_batch_reward": -0.26669242084026334,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1532497823238373,
|
|
"signal/accuracy_reward/group_std_mean": 0.20331214964389802,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4138888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07662489116191865,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07662489116191865,
|
|
"signal/advantage_abs_mean": 0.09952675998210907,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09952675998210907,
|
|
"signal/advantage_pre_scale_std": 0.16293827295303345,
|
|
"signal/advantage_std": 0.16293827295303345,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.15953900516033173,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2021380215883255,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0019942376995459197,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019942376995459197,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.15953900516033173,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2021380215883255,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0019942376995459197,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019942376995459197,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1748807907104492,
|
|
"signal/batch_coverage_5/group_std_mean": 0.22231533825397493,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0021860099397599695,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0021860099397599695,
|
|
"signal/brier_reward/centered_abs_mean": 0.13942358195781707,
|
|
"signal/brier_reward/group_std_mean": 0.18119371235370635,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013942358270287513,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013942358270287513,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03780218847095966,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06369221433997155,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003780218819156289,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003780218819156289,
|
|
"signal/format_reward/centered_abs_mean": 0.027105034328997135,
|
|
"signal/format_reward/group_std_mean": 0.05114769637584686,
|
|
"signal/format_reward/group_zero_std_frac": 0.7916666626930237,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013552517164498568,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013552517164498568,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019960318226367236,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003401953401044011,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.495039880159311e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.495039880159311e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18583748638629913,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24297050833702089,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023229687009006738,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023229687009006738,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14687398970127105,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19355247914791107,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018359248293563723,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018359248293563723,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06955377981066704,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09095239490270615,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008694222313351929,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008694222313351929,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08188401460647583,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10543016791343689,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010235501918941735,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010235501918941735,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.022962706908583642,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.029792382568120956,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022962707560509445,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022962707560509445,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31707857847213744,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38687134981155397,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03170785903930664,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03170785903930664,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.12764000468111514,
|
|
"calibration/batch_distribution_entropy": 0.956394671554677,
|
|
"calibration/buffer_distribution_entropy": 0.9829512887475694,
|
|
"calibration/confidence_entropy": 0.46494209433669287,
|
|
"calibration/coverage@0%": 0.0482555215056701,
|
|
"calibration/coverage@1%": 0.14563772045854967,
|
|
"calibration/coverage@10%": 0.5811881577849134,
|
|
"calibration/coverage@15%": 0.6835331579367019,
|
|
"calibration/coverage@20%": 0.7459375903352157,
|
|
"calibration/coverage@25%": 0.83063107476058,
|
|
"calibration/coverage@30%": 0.9025417526070492,
|
|
"calibration/coverage@5%": 0.2723134211315214,
|
|
"calibration/ece": 0.17005445506692282,
|
|
"calibration/mean_confidence": 0.542795898585713,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017100694444444463,
|
|
"completions/max_length": 3833.2,
|
|
"completions/max_terminated_length": 3833.2,
|
|
"completions/mean_length": 888.80322265625,
|
|
"completions/mean_terminated_length": 904.3852905273437,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 270.6,
|
|
"epoch": 0.39599505006187424,
|
|
"grad_norm": 0.0003439970896579325,
|
|
"learning_rate": 1.2951807228915664e-06,
|
|
"loss": -0.0134,
|
|
"num_tokens": 379505628.0,
|
|
"reward": 0.9866411328315735,
|
|
"reward_std": 0.1354316145181656,
|
|
"rewards/accuracy_reward": 0.665625,
|
|
"rewards/batch_coverage_0": 0.2924619853496552,
|
|
"rewards/batch_coverage_1": 0.2924619853496552,
|
|
"rewards/batch_coverage_5": 0.34387720823287965,
|
|
"rewards/brier_reward": 0.7988099932670594,
|
|
"rewards/confidence_uniqueness_reward": 0.9345163106918335,
|
|
"rewards/format_reward": 0.9828993082046509,
|
|
"rewards/frontier_aurc_reward": -0.0012383663910441101,
|
|
"rewards/frontier_coverage_10": 0.032382465153932574,
|
|
"rewards/frontier_coverage_15": 0.03611158281564712,
|
|
"rewards/frontier_coverage_20": 0.051848241686820985,
|
|
"rewards/frontier_coverage_25": 0.12219490557909012,
|
|
"rewards/frontier_ece_reward": 0.0037251237663440406,
|
|
"rewards/frontier_entropy_batch_reward": -0.2595239460468292,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1499891459941864,
|
|
"signal/accuracy_reward/group_std_mean": 0.19837667644023896,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4333333373069763,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0749945729970932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0749945729970932,
|
|
"signal/advantage_abs_mean": 0.09967254400253296,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09967254400253296,
|
|
"signal/advantage_pre_scale_std": 0.16272779703140258,
|
|
"signal/advantage_std": 0.16272779703140258,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.15506300330162048,
|
|
"signal/batch_coverage_0/group_std_mean": 0.19931573271751404,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.001938287541270256,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.001938287541270256,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.15506300330162048,
|
|
"signal/batch_coverage_1/group_std_mean": 0.19931573271751404,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.001938287541270256,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.001938287541270256,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1697798639535904,
|
|
"signal/batch_coverage_5/group_std_mean": 0.219083034992218,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0021222483832389117,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0021222483832389117,
|
|
"signal/brier_reward/centered_abs_mean": 0.1341599702835083,
|
|
"signal/brier_reward/group_std_mean": 0.1754637211561203,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013415997102856635,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013415997102856635,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03774611875414848,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06073072776198387,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037746119778603315,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037746119778603315,
|
|
"signal/format_reward/centered_abs_mean": 0.02798936627805233,
|
|
"signal/format_reward/group_std_mean": 0.049074608087539676,
|
|
"signal/format_reward/group_zero_std_frac": 0.8083333253860474,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013994683139026165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013994683139026165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015422555385157465,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0027716793585568665,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.927819521370111e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.927819521370111e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18659536838531493,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2453050583600998,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023324421606957914,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023324421606957914,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12652845978736876,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16868894398212433,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015816058265045285,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015816058265045285,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06248385086655617,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08090667426586151,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007810481358319521,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007810481358319521,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0859490916132927,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1114412397146225,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001074363652151078,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001074363652151078,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02253422886133194,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.029314831271767615,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002253422932699323,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002253422932699323,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3136689722537994,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38422879576683044,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031366899609565735,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031366899609565735,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.0882601384128311,
|
|
"calibration/batch_distribution_entropy": 0.9181279188601558,
|
|
"calibration/buffer_distribution_entropy": 0.9849184133852686,
|
|
"calibration/confidence_entropy": 0.4608181815223483,
|
|
"calibration/coverage@0%": 0.0932575574469796,
|
|
"calibration/coverage@1%": 0.16929922411364628,
|
|
"calibration/coverage@10%": 0.708789194587728,
|
|
"calibration/coverage@15%": 0.8072679880100925,
|
|
"calibration/coverage@20%": 0.8885278567821299,
|
|
"calibration/coverage@25%": 0.9346248385618464,
|
|
"calibration/coverage@30%": 0.965708453110028,
|
|
"calibration/coverage@5%": 0.4389986003230856,
|
|
"calibration/ece": 0.11500486159553133,
|
|
"calibration/mean_confidence": 0.6482842382180847,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012413194444444465,
|
|
"completions/max_length": 3864.2,
|
|
"completions/max_terminated_length": 3864.2,
|
|
"completions/mean_length": 863.4512329101562,
|
|
"completions/mean_terminated_length": 874.2814331054688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 239.2,
|
|
"epoch": 0.4079949000637492,
|
|
"grad_norm": 0.0003908317012246698,
|
|
"learning_rate": 1.1445783132530121e-06,
|
|
"loss": -0.0103,
|
|
"num_tokens": 392541770.0,
|
|
"reward": 1.017712938785553,
|
|
"reward_std": 0.12957948297262192,
|
|
"rewards/accuracy_reward": 0.7276041507720947,
|
|
"rewards/batch_coverage_0": 0.30742999315261843,
|
|
"rewards/batch_coverage_1": 0.30742999315261843,
|
|
"rewards/batch_coverage_5": 0.3649780869483948,
|
|
"rewards/brier_reward": 0.8220609188079834,
|
|
"rewards/confidence_uniqueness_reward": 0.9339541792869568,
|
|
"rewards/format_reward": 0.987413203716278,
|
|
"rewards/frontier_aurc_reward": -0.0011829802067950367,
|
|
"rewards/frontier_coverage_10": 0.0098116684705019,
|
|
"rewards/frontier_coverage_15": 0.025499676587060094,
|
|
"rewards/frontier_coverage_20": 0.0708703301846981,
|
|
"rewards/frontier_coverage_25": 0.16849485635757447,
|
|
"rewards/frontier_ece_reward": 0.0016525331884622574,
|
|
"rewards/frontier_entropy_batch_reward": -0.3122913181781769,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14816623330116271,
|
|
"signal/accuracy_reward/group_std_mean": 0.20093845427036286,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07408311665058136,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07408311665058136,
|
|
"signal/advantage_abs_mean": 0.09465723633766174,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09465723633766174,
|
|
"signal/advantage_pre_scale_std": 0.15696861743927001,
|
|
"signal/advantage_std": 0.15696861743927001,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.15024828910827637,
|
|
"signal/batch_coverage_0/group_std_mean": 0.19131266176700593,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.001878103637136519,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.001878103637136519,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.15024828910827637,
|
|
"signal/batch_coverage_1/group_std_mean": 0.19131266176700593,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.001878103637136519,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.001878103637136519,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.16728052496910095,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2136065810918808,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.002091006631962955,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002091006631962955,
|
|
"signal/brier_reward/centered_abs_mean": 0.12385228276252747,
|
|
"signal/brier_reward/group_std_mean": 0.16257039606571197,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012385228648781776,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012385228648781776,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0335435301065445,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05371822491288185,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033543531782925127,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033543531782925127,
|
|
"signal/format_reward/centered_abs_mean": 0.02024197019636631,
|
|
"signal/format_reward/group_std_mean": 0.037736759334802625,
|
|
"signal/format_reward/group_zero_std_frac": 0.8444444417953492,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010120985098183155,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010120985098183155,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016765848034992815,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003001072024926543,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.095731033477932e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.095731033477932e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16433679759502412,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21810666620731353,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002054209914058447,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002054209914058447,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09619698077440261,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.12904551327228547,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012024622410535812,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012024622410535812,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06167029365897179,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07872337400913239,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007708786986768245,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007708786986768245,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09959283322095872,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12883218973875046,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001244910410605371,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001244910410605371,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.020044444501399993,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02636742815375328,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020044445060193538,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020044445060193538,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3269516587257385,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3963646709918976,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0326951652765274,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0326951652765274,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.11770260909636773,
|
|
"calibration/batch_distribution_entropy": 0.9579462020628409,
|
|
"calibration/buffer_distribution_entropy": 0.9845349636672542,
|
|
"calibration/confidence_entropy": 0.4837524310084838,
|
|
"calibration/coverage@0%": 0.04676566128454357,
|
|
"calibration/coverage@1%": 0.04676566128454357,
|
|
"calibration/coverage@10%": 0.5410517256389931,
|
|
"calibration/coverage@15%": 0.6916496173493007,
|
|
"calibration/coverage@20%": 0.8193747798843626,
|
|
"calibration/coverage@25%": 0.9156683314771754,
|
|
"calibration/coverage@30%": 0.9687098953056401,
|
|
"calibration/coverage@5%": 0.30845293421382525,
|
|
"calibration/ece": 0.1428853576171553,
|
|
"calibration/mean_confidence": 0.5829123842743895,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.016840277777777767,
|
|
"completions/max_length": 3886.0,
|
|
"completions/max_terminated_length": 3886.0,
|
|
"completions/mean_length": 917.1794311523438,
|
|
"completions/mean_terminated_length": 932.885546875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 232.6,
|
|
"epoch": 0.4199947500656242,
|
|
"grad_norm": 0.0003793597570620477,
|
|
"learning_rate": 9.93975903614458e-07,
|
|
"loss": -0.015,
|
|
"num_tokens": 406215645.0,
|
|
"reward": 1.0036607384681702,
|
|
"reward_std": 0.14257338047027587,
|
|
"rewards/accuracy_reward": 0.7044270992279053,
|
|
"rewards/batch_coverage_0": 0.27710092067718506,
|
|
"rewards/batch_coverage_1": 0.27710092067718506,
|
|
"rewards/batch_coverage_5": 0.3239390730857849,
|
|
"rewards/brier_reward": 0.808384120464325,
|
|
"rewards/confidence_uniqueness_reward": 0.9317871928215027,
|
|
"rewards/format_reward": 0.9831597089767456,
|
|
"rewards/frontier_aurc_reward": -0.001271896972320974,
|
|
"rewards/frontier_coverage_10": 0.011056367494165897,
|
|
"rewards/frontier_coverage_15": 0.02494898848235607,
|
|
"rewards/frontier_coverage_20": 0.07640131264925003,
|
|
"rewards/frontier_coverage_25": 0.1671406090259552,
|
|
"rewards/frontier_ece_reward": 0.0012046609073877335,
|
|
"rewards/frontier_entropy_batch_reward": -0.2872548520565033,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1644802510738373,
|
|
"signal/accuracy_reward/group_std_mean": 0.22126680314540864,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.36111111044883726,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08224012553691865,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08224012553691865,
|
|
"signal/advantage_abs_mean": 0.10343722999095917,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10343722999095917,
|
|
"signal/advantage_pre_scale_std": 0.1708647608757019,
|
|
"signal/advantage_std": 0.1708647608757019,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.15582129061222078,
|
|
"signal/batch_coverage_0/group_std_mean": 0.2000853270292282,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0019477661699056625,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019477661699056625,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.15582129061222078,
|
|
"signal/batch_coverage_1/group_std_mean": 0.2000853270292282,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0019477661699056625,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019477661699056625,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.16923550069332122,
|
|
"signal/batch_coverage_5/group_std_mean": 0.21692816615104676,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0021154437214136123,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0021154437214136123,
|
|
"signal/brier_reward/centered_abs_mean": 0.1347814291715622,
|
|
"signal/brier_reward/group_std_mean": 0.17461273968219757,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013478142954409123,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013478142954409123,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03903680965304375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.061811356246471404,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0039036809001117946,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0039036809001117946,
|
|
"signal/format_reward/centered_abs_mean": 0.02781032994389534,
|
|
"signal/format_reward/group_std_mean": 0.0484844371676445,
|
|
"signal/format_reward/group_zero_std_frac": 0.8138889074325562,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01390516497194767,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01390516497194767,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016947093186900019,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003017672011628747,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1183867465879304e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1183867465879304e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18022984266281128,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2389580488204956,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002252873033285141,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002252873033285141,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09130481630563736,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.12277543842792511,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001141310203820467,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001141310203820467,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06616577729582787,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08398585319519043,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000827072246465832,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000827072246465832,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1071733370423317,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13738629817962647,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013396667782217264,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013396667782217264,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.021149307116866113,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.027549223601818086,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021149307489395142,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021149307489395142,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3150637447834015,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3849453806877136,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03150637447834015,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03150637447834015,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.07757748100166686,
|
|
"calibration/batch_distribution_entropy": 0.9508302217171369,
|
|
"calibration/buffer_distribution_entropy": 0.983970957900999,
|
|
"calibration/confidence_entropy": 0.48246487867849785,
|
|
"calibration/coverage@0%": 0.06952750269639216,
|
|
"calibration/coverage@1%": 0.1317966319839911,
|
|
"calibration/coverage@10%": 0.6955960358763023,
|
|
"calibration/coverage@15%": 0.852801106493976,
|
|
"calibration/coverage@20%": 0.9429961823816573,
|
|
"calibration/coverage@25%": 0.9671957671957673,
|
|
"calibration/coverage@30%": 0.9904761904761905,
|
|
"calibration/coverage@5%": 0.47816170763690363,
|
|
"calibration/ece": 0.17113750660863866,
|
|
"calibration/mean_confidence": 0.617869366140711,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0203125,
|
|
"completions/max_length": 3923.2,
|
|
"completions/max_terminated_length": 3923.2,
|
|
"completions/mean_length": 885.2420166015625,
|
|
"completions/mean_terminated_length": 903.5501708984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 271.4,
|
|
"epoch": 0.4319946000674992,
|
|
"grad_norm": 0.0003578953619580716,
|
|
"learning_rate": 8.433734939759036e-07,
|
|
"loss": -0.0174,
|
|
"num_tokens": 419513601.0,
|
|
"reward": 1.0005189657211304,
|
|
"reward_std": 0.14266968369483948,
|
|
"rewards/accuracy_reward": 0.7103298544883728,
|
|
"rewards/batch_coverage_0": 0.28121693432331085,
|
|
"rewards/batch_coverage_1": 0.28121693432331085,
|
|
"rewards/batch_coverage_5": 0.32759142518043516,
|
|
"rewards/brier_reward": 0.7978660702705384,
|
|
"rewards/confidence_uniqueness_reward": 0.9267572522163391,
|
|
"rewards/format_reward": 0.9796875,
|
|
"rewards/frontier_aurc_reward": -0.0019006900722160936,
|
|
"rewards/frontier_coverage_10": 0.00238664704374969,
|
|
"rewards/frontier_coverage_15": 0.02290529925376177,
|
|
"rewards/frontier_coverage_20": 0.08024742603302001,
|
|
"rewards/frontier_coverage_25": 0.17269090414047242,
|
|
"rewards/frontier_ece_reward": -7.713742525083944e-06,
|
|
"rewards/frontier_entropy_batch_reward": -0.3153074085712433,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16074761152267455,
|
|
"signal/accuracy_reward/group_std_mean": 0.20859464704990388,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4194444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08037380576133728,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08037380576133728,
|
|
"signal/advantage_abs_mean": 0.10613918602466584,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10613918602466584,
|
|
"signal/advantage_pre_scale_std": 0.17568522989749907,
|
|
"signal/advantage_std": 0.17568522989749907,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.15213667154312133,
|
|
"signal/batch_coverage_0/group_std_mean": 0.19567765295505524,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.001901708380319178,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.001901708380319178,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.15213667154312133,
|
|
"signal/batch_coverage_1/group_std_mean": 0.19567765295505524,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.001901708380319178,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.001901708380319178,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.16584835946559906,
|
|
"signal/batch_coverage_5/group_std_mean": 0.21259851455688478,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0020731045166030525,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020731045166030525,
|
|
"signal/brier_reward/centered_abs_mean": 0.1359453946352005,
|
|
"signal/brier_reward/group_std_mean": 0.17669061124324797,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01359453983604908,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01359453983604908,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04342151135206222,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06899276375770569,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004342151107266545,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004342151107266545,
|
|
"signal/format_reward/centered_abs_mean": 0.03164062537252903,
|
|
"signal/format_reward/group_std_mean": 0.05509527325630188,
|
|
"signal/format_reward/group_zero_std_frac": 0.7833333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015820312686264514,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.015820312686264514,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021755524445325135,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038825182244181635,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7194406720809637e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7194406720809637e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17187611758708954,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22681587934494019,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002148451516404748,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002148451516404748,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07667210996150971,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10228205025196076,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009584014187566936,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009584014187566936,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07023517787456512,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.089180588722229,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008779397583566606,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008779397583566606,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11487879753112792,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14849914908409118,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014359849970787763,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014359849970787763,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.020141305401921272,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.026219186559319495,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020141306333243845,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020141306333243845,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3288719952106476,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39527170062065126,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03288719952106476,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03288719952106476,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1818975589715942,
|
|
"calibration/batch_distribution_entropy": 0.9567355915721976,
|
|
"calibration/buffer_distribution_entropy": 0.9835585882937675,
|
|
"calibration/confidence_entropy": 0.49104115356341216,
|
|
"calibration/coverage@0%": 0.039110295481716614,
|
|
"calibration/coverage@1%": 0.039110295481716614,
|
|
"calibration/coverage@10%": 0.18053049119841696,
|
|
"calibration/coverage@15%": 0.49428874053537813,
|
|
"calibration/coverage@20%": 0.8066940010656737,
|
|
"calibration/coverage@25%": 0.898952879581152,
|
|
"calibration/coverage@30%": 0.9267015706806283,
|
|
"calibration/coverage@5%": 0.08188014456308144,
|
|
"calibration/ece": 0.18851791048089234,
|
|
"calibration/mean_confidence": 0.5862262140054744,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01987847222222221,
|
|
"completions/max_length": 3764.4,
|
|
"completions/max_terminated_length": 3764.4,
|
|
"completions/mean_length": 896.888623046875,
|
|
"completions/mean_terminated_length": 915.1306640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 235.4,
|
|
"epoch": 0.44399445006937416,
|
|
"grad_norm": 0.00039063894655555487,
|
|
"learning_rate": 6.927710843373495e-07,
|
|
"loss": -0.0163,
|
|
"num_tokens": 432935806.0,
|
|
"reward": 0.9909634113311767,
|
|
"reward_std": 0.14615142345428467,
|
|
"rewards/accuracy_reward": 0.6793402791023254,
|
|
"rewards/batch_coverage_0": 0.2794734358787537,
|
|
"rewards/batch_coverage_1": 0.2794734358787537,
|
|
"rewards/batch_coverage_5": 0.323152631521225,
|
|
"rewards/brier_reward": 0.7977786898612976,
|
|
"rewards/confidence_uniqueness_reward": 0.9308565855026245,
|
|
"rewards/format_reward": 0.9801215291023254,
|
|
"rewards/frontier_aurc_reward": -0.0015429545659571887,
|
|
"rewards/frontier_coverage_10": 0.018955985829234124,
|
|
"rewards/frontier_coverage_15": 0.03628319762647152,
|
|
"rewards/frontier_coverage_20": 0.08039498776197433,
|
|
"rewards/frontier_coverage_25": 0.16077298521995545,
|
|
"rewards/frontier_ece_reward": 0.0002475063287420198,
|
|
"rewards/frontier_entropy_batch_reward": -0.263678240776062,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1656684011220932,
|
|
"signal/accuracy_reward/group_std_mean": 0.21772934198379518,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0828342005610466,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0828342005610466,
|
|
"signal/advantage_abs_mean": 0.10910950750112533,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10910950750112533,
|
|
"signal/advantage_pre_scale_std": 0.1739081084728241,
|
|
"signal/advantage_std": 0.1739081084728241,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.15024437606334687,
|
|
"signal/batch_coverage_0/group_std_mean": 0.1934993803501129,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0018780547194182872,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018780547194182872,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.15024437606334687,
|
|
"signal/batch_coverage_1/group_std_mean": 0.1934993803501129,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0018780547194182872,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018780547194182872,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.161685311794281,
|
|
"signal/batch_coverage_5/group_std_mean": 0.20772615373134612,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0020210665417835115,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020210665417835115,
|
|
"signal/brier_reward/centered_abs_mean": 0.1355786770582199,
|
|
"signal/brier_reward/group_std_mean": 0.17508684396743773,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013557868637144565,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013557868637144565,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.040959518402814865,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0655187301337719,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004095951886847615,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004095951886847615,
|
|
"signal/format_reward/centered_abs_mean": 0.03091905377805233,
|
|
"signal/format_reward/group_std_mean": 0.053562342375516894,
|
|
"signal/format_reward/group_zero_std_frac": 0.7916666865348816,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015459526889026165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.015459526889026165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018253508023917675,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0031713686417788266,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2816885757492855e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2816885757492855e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1804261773824692,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23783507347106933,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002255327207967639,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002255327207967639,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06934027075767517,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09224382489919662,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008667534217238426,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008667534217238426,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06672045737504959,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08622078448534012,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000834005733486265,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000834005733486265,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10972451716661454,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14255777895450591,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013715565204620362,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013715565204620362,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.020347204804420472,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.026265929639339446,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020347204990684986,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020347204990684986,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30738086700439454,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37744983434677126,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03073808699846268,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03073808699846268,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.15063273591909837,
|
|
"calibration/batch_distribution_entropy": 0.9388096875691513,
|
|
"calibration/buffer_distribution_entropy": 0.9826716622231875,
|
|
"calibration/confidence_entropy": 0.5015073738095571,
|
|
"calibration/coverage@0%": 0.053256496819308975,
|
|
"calibration/coverage@1%": 0.0808427037158607,
|
|
"calibration/coverage@10%": 0.36639852102059134,
|
|
"calibration/coverage@15%": 0.5280464859928335,
|
|
"calibration/coverage@20%": 0.7723397872167156,
|
|
"calibration/coverage@25%": 0.9513048245614035,
|
|
"calibration/coverage@30%": 0.9905263157894737,
|
|
"calibration/coverage@5%": 0.199966045344166,
|
|
"calibration/ece": 0.17788109724575601,
|
|
"calibration/mean_confidence": 0.6104825636089752,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012500000000000022,
|
|
"completions/max_length": 3865.0,
|
|
"completions/max_terminated_length": 3865.0,
|
|
"completions/mean_length": 888.7177978515625,
|
|
"completions/mean_terminated_length": 899.983154296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 246.0,
|
|
"epoch": 0.45599430007124914,
|
|
"grad_norm": 0.0003803574072662741,
|
|
"learning_rate": 5.421686746987952e-07,
|
|
"loss": -0.0092,
|
|
"num_tokens": 446256779.0,
|
|
"reward": 1.018767237663269,
|
|
"reward_std": 0.13558549880981446,
|
|
"rewards/accuracy_reward": 0.7313368082046509,
|
|
"rewards/batch_coverage_0": 0.2717293441295624,
|
|
"rewards/batch_coverage_1": 0.2717293441295624,
|
|
"rewards/batch_coverage_5": 0.32184417843818663,
|
|
"rewards/brier_reward": 0.8048649549484252,
|
|
"rewards/confidence_uniqueness_reward": 0.9368955492973328,
|
|
"rewards/format_reward": 0.9875,
|
|
"rewards/frontier_aurc_reward": -0.0015228277770802379,
|
|
"rewards/frontier_coverage_10": -0.008218994364142418,
|
|
"rewards/frontier_coverage_15": 0.03318442739546299,
|
|
"rewards/frontier_coverage_20": 0.09056447297334672,
|
|
"rewards/frontier_coverage_25": 0.1810651034116745,
|
|
"rewards/frontier_ece_reward": -0.0035485433822032065,
|
|
"rewards/frontier_entropy_batch_reward": -0.2897710233926773,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16082356870174408,
|
|
"signal/accuracy_reward/group_std_mean": 0.20871648490428923,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40833333134651184,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08041178435087204,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08041178435087204,
|
|
"signal/advantage_abs_mean": 0.10099045187234879,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10099045187234879,
|
|
"signal/advantage_pre_scale_std": 0.1624716430902481,
|
|
"signal/advantage_std": 0.1624716430902481,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.14960849285125732,
|
|
"signal/batch_coverage_0/group_std_mean": 0.1894178122282028,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0018701060907915235,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018701060907915235,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.14960849285125732,
|
|
"signal/batch_coverage_1/group_std_mean": 0.1894178122282028,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0018701060907915235,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018701060907915235,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1641196310520172,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2089547961950302,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.002051495388150215,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.002051495388150215,
|
|
"signal/brier_reward/centered_abs_mean": 0.13183022737503053,
|
|
"signal/brier_reward/group_std_mean": 0.1714376240968704,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013183023221790791,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013183023221790791,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03192863427102566,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0548668347299099,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003192863380536437,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003192863380536437,
|
|
"signal/format_reward/centered_abs_mean": 0.020833333395421504,
|
|
"signal/format_reward/group_std_mean": 0.04174697957932949,
|
|
"signal/format_reward/group_zero_std_frac": 0.819444453716278,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010416666697710752,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010416666697710752,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018504442647099494,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0034003911074250936,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3130554109229706e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3130554109229706e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18468638360500336,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23832190036773682,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002308579720556736,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002308579720556736,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0668262206017971,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08740047812461853,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008353277808055282,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008353277808055282,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07043667733669282,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0902448683977127,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008804585319012404,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008804585319012404,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11264470964670181,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14612998068332672,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014080588938668371,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014080588938668371,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.020465316995978356,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.025910362601280212,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002046531718224287,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002046531718224287,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32341882586479187,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39242342710494993,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032341883331537244,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032341883331537244,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.17671124703371058,
|
|
"calibration/batch_distribution_entropy": 0.98061718650997,
|
|
"calibration/buffer_distribution_entropy": 0.9832519295423037,
|
|
"calibration/confidence_entropy": 0.49134951478877886,
|
|
"calibration/coverage@0%": 0.012995178771143123,
|
|
"calibration/coverage@1%": 0.012995178771143123,
|
|
"calibration/coverage@10%": 0.37411120365159467,
|
|
"calibration/coverage@15%": 0.4711492793715818,
|
|
"calibration/coverage@20%": 0.5555846153655415,
|
|
"calibration/coverage@25%": 0.8815871516296646,
|
|
"calibration/coverage@30%": 0.9270215471821519,
|
|
"calibration/coverage@5%": 0.23632671438398606,
|
|
"calibration/ece": 0.20229081024109904,
|
|
"calibration/mean_confidence": 0.5436326881346589,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01953125,
|
|
"completions/max_length": 3952.4,
|
|
"completions/max_terminated_length": 3952.4,
|
|
"completions/mean_length": 922.8704956054687,
|
|
"completions/mean_terminated_length": 941.5363037109375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 225.6,
|
|
"epoch": 0.46799415007312406,
|
|
"grad_norm": 0.00038015254540368915,
|
|
"learning_rate": 3.91566265060241e-07,
|
|
"loss": -0.0157,
|
|
"num_tokens": 459969111.0,
|
|
"reward": 0.9897091388702393,
|
|
"reward_std": 0.1438922643661499,
|
|
"rewards/accuracy_reward": 0.6790798544883728,
|
|
"rewards/batch_coverage_0": 0.271970134973526,
|
|
"rewards/batch_coverage_1": 0.271970134973526,
|
|
"rewards/batch_coverage_5": 0.29821199774742124,
|
|
"rewards/brier_reward": 0.7866911649703979,
|
|
"rewards/confidence_uniqueness_reward": 0.9321794986724854,
|
|
"rewards/format_reward": 0.98046875,
|
|
"rewards/frontier_aurc_reward": -0.001562349358573556,
|
|
"rewards/frontier_coverage_10": 0.01311279283836484,
|
|
"rewards/frontier_coverage_15": 0.03557181544601917,
|
|
"rewards/frontier_coverage_20": 0.08295716494321823,
|
|
"rewards/frontier_coverage_25": 0.15684463679790497,
|
|
"rewards/frontier_ece_reward": -0.0021067145047709347,
|
|
"rewards/frontier_entropy_batch_reward": -0.2585505545139313,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16019422709941863,
|
|
"signal/accuracy_reward/group_std_mean": 0.21169654428958892,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08009711354970932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08009711354970932,
|
|
"signal/advantage_abs_mean": 0.10677080303430557,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10677080303430557,
|
|
"signal/advantage_pre_scale_std": 0.1714014321565628,
|
|
"signal/advantage_std": 0.1714014321565628,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.1564124494791031,
|
|
"signal/batch_coverage_0/group_std_mean": 0.20219504535198213,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0019551556790247558,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019551556790247558,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.1564124494791031,
|
|
"signal/batch_coverage_1/group_std_mean": 0.20219504535198213,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0019551556790247558,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019551556790247558,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.16378523409366608,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2116617739200592,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0020473155193030832,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0020473155193030832,
|
|
"signal/brier_reward/centered_abs_mean": 0.14255260676145554,
|
|
"signal/brier_reward/group_std_mean": 0.18385781347751617,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01425526086241007,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01425526086241007,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.039196794480085374,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06418116241693497,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003919679578393698,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003919679578393698,
|
|
"signal/format_reward/centered_abs_mean": 0.03014865517616272,
|
|
"signal/format_reward/group_std_mean": 0.053524895757436755,
|
|
"signal/format_reward/group_zero_std_frac": 0.7888888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01507432758808136,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01507432758808136,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018353921128436922,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003300427459180355,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.294240257469937e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.294240257469937e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17010368406772614,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22576397955417632,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021262960508465767,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021262960508465767,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06435753330588341,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08455722033977509,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008044691872783005,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008044691872783005,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07232896238565445,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09357113689184189,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009041120298206806,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009041120298206806,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11574404090642929,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14936714470386506,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014468005392700434,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014468005392700434,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02034129723906517,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02622072398662567,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00203412976115942,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00203412976115942,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31321715712547304,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3855625748634338,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03132171593606472,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03132171593606472,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.13757158602200054,
|
|
"calibration/batch_distribution_entropy": 0.9579692523446468,
|
|
"calibration/buffer_distribution_entropy": 0.9831869417924292,
|
|
"calibration/confidence_entropy": 0.5063417819648874,
|
|
"calibration/coverage@0%": 0.043145462080397844,
|
|
"calibration/coverage@1%": 0.06998756734355574,
|
|
"calibration/coverage@10%": 0.3950514455377844,
|
|
"calibration/coverage@15%": 0.6081583390352494,
|
|
"calibration/coverage@20%": 0.8909739094554915,
|
|
"calibration/coverage@25%": 0.9334046037978933,
|
|
"calibration/coverage@30%": 0.9611548556430446,
|
|
"calibration/coverage@5%": 0.24764067107914797,
|
|
"calibration/ece": 0.17464724959446315,
|
|
"calibration/mean_confidence": 0.6019992007131725,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012413194444444442,
|
|
"completions/max_length": 3799.0,
|
|
"completions/max_terminated_length": 3799.0,
|
|
"completions/mean_length": 896.29296875,
|
|
"completions/mean_terminated_length": 907.60849609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 268.0,
|
|
"epoch": 0.47999400007499904,
|
|
"grad_norm": 0.0003703467664308846,
|
|
"learning_rate": 2.409638554216868e-07,
|
|
"loss": -0.0092,
|
|
"num_tokens": 473362214.0,
|
|
"reward": 1.0050120234489441,
|
|
"reward_std": 0.1314167633652687,
|
|
"rewards/accuracy_reward": 0.7024305701255799,
|
|
"rewards/batch_coverage_0": 0.2870294451713562,
|
|
"rewards/batch_coverage_1": 0.2870294451713562,
|
|
"rewards/batch_coverage_5": 0.32983245253562926,
|
|
"rewards/brier_reward": 0.799726414680481,
|
|
"rewards/confidence_uniqueness_reward": 0.9372918605804443,
|
|
"rewards/format_reward": 0.987499988079071,
|
|
"rewards/frontier_aurc_reward": -0.0017026964342221618,
|
|
"rewards/frontier_coverage_10": 0.008682099310681224,
|
|
"rewards/frontier_coverage_15": 0.03962989971041679,
|
|
"rewards/frontier_coverage_20": 0.09496284425258636,
|
|
"rewards/frontier_coverage_25": 0.1753475546836853,
|
|
"rewards/frontier_ece_reward": -0.0030850290786474943,
|
|
"rewards/frontier_entropy_batch_reward": -0.28606693148612977,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14751518964767457,
|
|
"signal/accuracy_reward/group_std_mean": 0.1944108635187149,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.44166666865348814,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07375759482383729,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07375759482383729,
|
|
"signal/advantage_abs_mean": 0.09806355237960815,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09806355237960815,
|
|
"signal/advantage_pre_scale_std": 0.15812867879867554,
|
|
"signal/advantage_std": 0.15812867879867554,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.14468889236450194,
|
|
"signal/batch_coverage_0/group_std_mean": 0.18608545362949372,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0018086112104356289,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018086112104356289,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.14468889236450194,
|
|
"signal/batch_coverage_1/group_std_mean": 0.18608545362949372,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0018086112104356289,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018086112104356289,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.15795843303203583,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2035380184650421,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0019744804361835123,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019744804361835123,
|
|
"signal/brier_reward/centered_abs_mean": 0.1314527153968811,
|
|
"signal/brier_reward/group_std_mean": 0.171079358458519,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013145271316170692,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013145271316170692,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03220085538923741,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.053303804248571396,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003220085659995675,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003220085659995675,
|
|
"signal/format_reward/centered_abs_mean": 0.021148003078997134,
|
|
"signal/format_reward/group_std_mean": 0.04009667597711086,
|
|
"signal/format_reward/group_zero_std_frac": 0.8361111164093018,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010574001539498567,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010574001539498567,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018965400056913495,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0033055396750569345,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3706751017016357e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3706751017016357e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1506176620721817,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19978395700454712,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018827208783477544,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018827208783477544,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.059682004153728485,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.07820275872945785,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007460250286385417,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007460250286385417,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07458705455064774,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09559879004955292,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009323381935246288,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009323381935246288,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11781590729951859,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15172553658485413,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014726989204064012,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014726989204064012,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.019530902430415154,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.024914587289094924,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019530903082340956,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019530903082340956,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3246802628040314,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39551456570625304,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03246802762150765,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03246802762150765,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.47999400007499904,
|
|
"eval_calibration/aurc": 0.1412181343363796,
|
|
"eval_calibration/batch_distribution_entropy": 0.9271140308478684,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9824580269394576,
|
|
"eval_calibration/confidence_entropy": 0.5147483922147414,
|
|
"eval_calibration/coverage@0%": 0.204133064516129,
|
|
"eval_calibration/coverage@1%": 0.204133064516129,
|
|
"eval_calibration/coverage@10%": 0.5497311827956989,
|
|
"eval_calibration/coverage@15%": 0.640625,
|
|
"eval_calibration/coverage@20%": 0.817372311827957,
|
|
"eval_calibration/coverage@25%": 0.9301075268817204,
|
|
"eval_calibration/coverage@30%": 0.9731182795698925,
|
|
"eval_calibration/coverage@5%": 0.204133064516129,
|
|
"eval_calibration/ece": 0.23087892427736922,
|
|
"eval_calibration/mean_confidence": 0.5610063740429124,
|
|
"eval_completions/clipped_ratio": 0.018229166666666685,
|
|
"eval_completions/max_length": 3073.1666666666665,
|
|
"eval_completions/max_terminated_length": 3073.1666666666665,
|
|
"eval_completions/mean_length": 913.9673868815104,
|
|
"eval_completions/mean_terminated_length": 931.3572896321615,
|
|
"eval_completions/min_length": 72.16666666666667,
|
|
"eval_completions/min_terminated_length": 309.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 473362214.0,
|
|
"eval_reward": 0.9203121761480967,
|
|
"eval_reward_std": 0.24826606611410776,
|
|
"eval_rewards/accuracy_reward": 0.714409718910853,
|
|
"eval_rewards/batch_coverage_0": 0.009844350046478212,
|
|
"eval_rewards/batch_coverage_1": 0.009844350046478212,
|
|
"eval_rewards/batch_coverage_5": 0.021961650578305125,
|
|
"eval_rewards/brier_reward": 0.7897129555543264,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8787291546662649,
|
|
"eval_rewards/format_reward": 0.9809027910232544,
|
|
"eval_rewards/frontier_aurc_reward": -0.0012339799917147805,
|
|
"eval_rewards/frontier_coverage_10": -0.0048470275942236185,
|
|
"eval_rewards/frontier_coverage_15": 0.03648912844558557,
|
|
"eval_rewards/frontier_coverage_20": 0.09729731952150662,
|
|
"eval_rewards/frontier_coverage_25": 0.18007576217254004,
|
|
"eval_rewards/frontier_ece_reward": -0.00465917030426984,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9809027910232544,
|
|
"eval_runtime": 210.5167,
|
|
"eval_samples_per_second": 4.75,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.3953450520833333,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.44999276598294574,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.19767252604166666,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19767252604166666,
|
|
"eval_signal/advantage_abs_mean": 0.20577156295379004,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20577156295379004,
|
|
"eval_signal/advantage_pre_scale_std": 0.24673331280549368,
|
|
"eval_signal/advantage_std": 0.24673331280549368,
|
|
"eval_signal/batch_coverage_0/centered_abs_mean": 0.16214851662516594,
|
|
"eval_signal/batch_coverage_0/group_std_mean": 0.24001923451821008,
|
|
"eval_signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.00202685648885866,
|
|
"eval_signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.00202685648885866,
|
|
"eval_signal/batch_coverage_1/centered_abs_mean": 0.16214851662516594,
|
|
"eval_signal/batch_coverage_1/group_std_mean": 0.24001923451821008,
|
|
"eval_signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.00202685648885866,
|
|
"eval_signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.00202685648885866,
|
|
"eval_signal/batch_coverage_5/centered_abs_mean": 0.15619109819332758,
|
|
"eval_signal/batch_coverage_5/group_std_mean": 0.22943883637587228,
|
|
"eval_signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0019523888283098738,
|
|
"eval_signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019523888283098738,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.19247209032376608,
|
|
"eval_signal/brier_reward/group_std_mean": 0.25090829531351727,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019247209032376606,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019247209032376606,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.060435134917497635,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.11479903633395831,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0060435136159261065,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0060435136159261065,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.03667534670482079,
|
|
"eval_signal/format_reward/group_std_mean": 0.09906197773913543,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.472222238779068,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.018337673352410395,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.018337673352410395,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002071200249095758,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004666941861311595,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5890003447178362e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5890003447178362e-05,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.22067337234814963,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.32334270576636,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027584172397231064,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027584172397231064,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.07596691697835922,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.10353057583173116,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009495864602892349,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009495864602892349,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.1193230574329694,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.15158799042304358,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014915381907485425,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014915381907485425,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.21610286831855774,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.26547010242938995,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027012857996548214,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027012857996548214,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.028222967870533466,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.03641134003798167,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002822296771531304,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002822296771531304,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.03667534670482079,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.09906197773913543,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.472222238779068,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0036675347558533153,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0036675347558533153,
|
|
"eval_steps_per_second": 0.029,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.147852153867797,
|
|
"calibration/batch_distribution_entropy": 0.938298864783472,
|
|
"calibration/buffer_distribution_entropy": 0.9827790201087444,
|
|
"calibration/confidence_entropy": 0.47021388206284576,
|
|
"calibration/coverage@0%": 0.028338895407133958,
|
|
"calibration/coverage@1%": 0.028338895407133958,
|
|
"calibration/coverage@10%": 0.3739227408221226,
|
|
"calibration/coverage@15%": 0.6042535687163965,
|
|
"calibration/coverage@20%": 0.8655461602919688,
|
|
"calibration/coverage@25%": 0.9150645087569858,
|
|
"calibration/coverage@30%": 0.9482849604221636,
|
|
"calibration/coverage@5%": 0.10740093250846247,
|
|
"calibration/ece": 0.14054002522189812,
|
|
"calibration/mean_confidence": 0.621981564051134,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014930555555555558,
|
|
"completions/max_length": 3687.8,
|
|
"completions/max_terminated_length": 3687.8,
|
|
"completions/mean_length": 910.7903686523438,
|
|
"completions/mean_terminated_length": 924.5707641601563,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 235.6,
|
|
"epoch": 0.491993850076874,
|
|
"grad_norm": 0.00031697930535301566,
|
|
"learning_rate": 9.036144578313253e-08,
|
|
"loss": -0.0101,
|
|
"num_tokens": 486920471.0,
|
|
"reward": 1.0269575834274292,
|
|
"reward_std": 0.13247913420200347,
|
|
"rewards/accuracy_reward": 0.7473958373069763,
|
|
"rewards/batch_coverage_0": 0.2986012935638428,
|
|
"rewards/batch_coverage_1": 0.2986012935638428,
|
|
"rewards/batch_coverage_5": 0.35023786425590514,
|
|
"rewards/brier_reward": 0.8035080075263977,
|
|
"rewards/confidence_uniqueness_reward": 0.9341195702552796,
|
|
"rewards/format_reward": 0.9849826335906983,
|
|
"rewards/frontier_aurc_reward": -0.0013065816019661725,
|
|
"rewards/frontier_coverage_10": -0.0037699075415730476,
|
|
"rewards/frontier_coverage_15": 0.046929216384887694,
|
|
"rewards/frontier_coverage_20": 0.11887629777193069,
|
|
"rewards/frontier_coverage_25": 0.21391085684299468,
|
|
"rewards/frontier_ece_reward": -0.005632767057977617,
|
|
"rewards/frontier_entropy_batch_reward": -0.28957144618034364,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1449761286377907,
|
|
"signal/accuracy_reward/group_std_mean": 0.19491939544677733,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43333333134651186,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07248806431889535,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07248806431889535,
|
|
"signal/advantage_abs_mean": 0.09573410004377365,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09573410004377365,
|
|
"signal/advantage_pre_scale_std": 0.16088206470012664,
|
|
"signal/advantage_std": 0.16088206470012664,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.15816357731819153,
|
|
"signal/batch_coverage_0/group_std_mean": 0.1993831902742386,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0019770448561757803,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0019770448561757803,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.15816357731819153,
|
|
"signal/batch_coverage_1/group_std_mean": 0.1993831902742386,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0019770448561757803,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0019770448561757803,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.17250239551067353,
|
|
"signal/batch_coverage_5/group_std_mean": 0.2175464391708374,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0021562800277024506,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0021562800277024506,
|
|
"signal/brier_reward/centered_abs_mean": 0.1319777175784111,
|
|
"signal/brier_reward/group_std_mean": 0.17091013491153717,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013197771646082402,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013197771646082402,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03617449998855591,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06037556529045105,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036174500361084937,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036174500361084937,
|
|
"signal/format_reward/centered_abs_mean": 0.02501627616584301,
|
|
"signal/format_reward/group_std_mean": 0.04717556312680245,
|
|
"signal/format_reward/group_zero_std_frac": 0.8055555701255799,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012508138082921505,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012508138082921505,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015603711362928152,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0028045469196513297,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.950463993125595e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.950463993125595e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14366675317287445,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18975805938243867,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001795834512449801,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001795834512449801,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06306189298629761,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0808226153254509,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007882736972533166,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007882736972533166,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07929624915122986,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10114771872758865,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009912031586281956,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009912031586281956,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1190194845199585,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15333056151866914,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014877435052767396,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014877435052767396,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.019452205300331114,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0247601967304945,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001945220516063273,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001945220516063273,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3261309266090393,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3921803832054138,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03261309340596199,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03261309340596199,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.09695432525621911,
|
|
"calibration/batch_distribution_entropy": 0.9552333976117616,
|
|
"calibration/buffer_distribution_entropy": 0.9832264695519918,
|
|
"calibration/confidence_entropy": 0.5117326285185179,
|
|
"calibration/coverage@0%": 0.05908567566814266,
|
|
"calibration/coverage@1%": 0.05908567566814266,
|
|
"calibration/coverage@10%": 0.6126286677199145,
|
|
"calibration/coverage@15%": 0.7595217028875668,
|
|
"calibration/coverage@20%": 0.9030759923681884,
|
|
"calibration/coverage@25%": 0.972632509656103,
|
|
"calibration/coverage@30%": 1.0,
|
|
"calibration/coverage@5%": 0.38115290869194784,
|
|
"calibration/ece": 0.155695649150309,
|
|
"calibration/mean_confidence": 0.601508712718853,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01244212962962965,
|
|
"completions/max_length": 3836.3333333333335,
|
|
"completions/max_terminated_length": 3836.3333333333335,
|
|
"completions/mean_length": 925.7978922526041,
|
|
"completions/mean_terminated_length": 937.7610880533854,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 218.0,
|
|
"epoch": 0.49919376007799904,
|
|
"num_tokens": 495185634.0,
|
|
"reward": 1.004624883333842,
|
|
"reward_std": 0.13393393407265344,
|
|
"rewards/accuracy_reward": 0.7026909589767456,
|
|
"rewards/batch_coverage_0": 0.27519936362902325,
|
|
"rewards/batch_coverage_1": 0.27519936362902325,
|
|
"rewards/batch_coverage_5": 0.3106227219104767,
|
|
"rewards/brier_reward": 0.7956988414128622,
|
|
"rewards/confidence_uniqueness_reward": 0.9369119803110758,
|
|
"rewards/format_reward": 0.9875578681627909,
|
|
"rewards/frontier_aurc_reward": -0.0013301618164405227,
|
|
"rewards/frontier_coverage_10": 0.007185542335112889,
|
|
"rewards/frontier_coverage_15": 0.046093116203943886,
|
|
"rewards/frontier_coverage_20": 0.10647296160459518,
|
|
"rewards/frontier_coverage_25": 0.1870871682961782,
|
|
"rewards/frontier_ece_reward": -0.0040694084794571,
|
|
"rewards/frontier_entropy_batch_reward": -0.28435274461905163,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15208152184883753,
|
|
"signal/accuracy_reward/group_std_mean": 0.20466437935829163,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.407407412926356,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07604076092441876,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07604076092441876,
|
|
"signal/advantage_abs_mean": 0.09747752547264099,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09747752547264099,
|
|
"signal/advantage_pre_scale_std": 0.1580593486626943,
|
|
"signal/advantage_std": 0.1580593486626943,
|
|
"signal/batch_coverage_0/centered_abs_mean": 0.14927902817726135,
|
|
"signal/batch_coverage_0/group_std_mean": 0.19230778018633524,
|
|
"signal/batch_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0018659877823665738,
|
|
"signal/batch_coverage_0/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_0/weighted_centered_abs_mean": 0.0018659877823665738,
|
|
"signal/batch_coverage_1/centered_abs_mean": 0.14927902817726135,
|
|
"signal/batch_coverage_1/group_std_mean": 0.19230778018633524,
|
|
"signal/batch_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0018659877823665738,
|
|
"signal/batch_coverage_1/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_1/weighted_centered_abs_mean": 0.0018659877823665738,
|
|
"signal/batch_coverage_5/centered_abs_mean": 0.1596171905597051,
|
|
"signal/batch_coverage_5/group_std_mean": 0.20558181405067444,
|
|
"signal/batch_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0019952148819963136,
|
|
"signal/batch_coverage_5/weight": 0.012500000186264515,
|
|
"signal/batch_coverage_5/weighted_centered_abs_mean": 0.0019952148819963136,
|
|
"signal/brier_reward/centered_abs_mean": 0.13661990563074747,
|
|
"signal/brier_reward/group_std_mean": 0.17795056601365408,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013661990873515606,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013661990873515606,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03251108837624391,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05413257206479708,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003251108961800734,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003251108961800734,
|
|
"signal/format_reward/centered_abs_mean": 0.0211407703657945,
|
|
"signal/format_reward/group_std_mean": 0.040644911428292595,
|
|
"signal/format_reward/group_zero_std_frac": 0.8333333333333334,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01057038518289725,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01057038518289725,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016895094110320012,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003310395792747537,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.111886957815538e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.111886957815538e-05,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14390182991822562,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18964583178361258,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017987728739778202,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017987728739778202,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06157498558362325,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.07971930752197902,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007696873314368228,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007696873314368228,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0782752235730489,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10096191863218944,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009784403179461758,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009784403179461758,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11965686082839966,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15492077668507895,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00149571072931091,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00149571072931091,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01999407820403576,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.025594223911563557,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019994079678629837,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019994079678629837,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32452202836672467,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39687562982241315,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0324522058169047,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0324522058169047,
|
|
"step": 208,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.013682471422586016,
|
|
"train_runtime": 44518.422,
|
|
"train_samples_per_second": 0.337,
|
|
"train_steps_per_second": 0.005
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 208,
|
|
"num_input_tokens_seen": 495185634,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 6,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|