9532 lines
600 KiB
JSON
9532 lines
600 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9984,
|
|
"eval_steps": 50,
|
|
"global_step": 312,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.6364776407113771,
|
|
"calibration/batch_distribution_entropy": 0.6455862671251419,
|
|
"calibration/confidence_entropy": 0.3430452682301779,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4929321511720823,
|
|
"calibration/mean_confidence": 0.7950546994723895,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0390625,
|
|
"completions/max_length": 1504.4,
|
|
"completions/max_terminated_length": 1504.4,
|
|
"completions/mean_length": 214.4884765625,
|
|
"completions/mean_terminated_length": 223.2016174316406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.04218404367566109,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"loss": 0.0088,
|
|
"num_tokens": 17040394.0,
|
|
"reward": 0.7162060260772705,
|
|
"reward_std": 0.6058708906173706,
|
|
"rewards/accgated_coverage_0": 0.3019547939300537,
|
|
"rewards/accgated_coverage_1": 0.3019547939300537,
|
|
"rewards/accgated_coverage_10": 0.3019547939300537,
|
|
"rewards/accgated_coverage_15": 0.3019547939300537,
|
|
"rewards/accgated_coverage_20": 0.3019547939300537,
|
|
"rewards/accgated_coverage_25": 0.3019547939300537,
|
|
"rewards/accgated_coverage_5": 0.3019547939300537,
|
|
"rewards/accuracy_reward": 0.2205078125,
|
|
"rewards/brier_reward": 0.3744439840316772,
|
|
"rewards/confidence_uniqueness_reward": 0.4882001519203186,
|
|
"rewards/format_reward": 0.67841796875,
|
|
"rewards/frontier_aurc_reward": 0.3019547939300537,
|
|
"rewards/frontier_ece_reward": 0.3019547939300537,
|
|
"rewards/frontier_entropy_batch_reward": -0.6485954165458679,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.2956149399280548,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.3455429673194885,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.003125,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.2956149399280548,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.3455429673194885,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.2956149399280548,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.3455429673194885,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.2956149399280548,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.3455429673194885,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.2956149399280548,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.3455429673194885,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.2956149399280548,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.3455429673194885,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.2956149399280548,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.3455429673194885,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.24259033203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.28361558318138125,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.31875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.121295166015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.121295166015625,
|
|
"signal/advantage_abs_mean": 0.5158906996250152,
|
|
"signal/advantage_pre_scale_abs_mean": 0.5158906996250152,
|
|
"signal/advantage_pre_scale_std": 0.6248098611831665,
|
|
"signal/advantage_std": 0.6248098611831665,
|
|
"signal/brier_reward/centered_abs_mean": 0.3227140247821808,
|
|
"signal/brier_reward/group_std_mean": 0.36719409823417665,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.032271404191851615,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.032271404191851615,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.3028075397014618,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.3515664279460907,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03028075359761715,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03028075359761715,
|
|
"signal/format_reward/centered_abs_mean": 0.407098388671875,
|
|
"signal/format_reward/group_std_mean": 0.4557113587856293,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2035491943359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.2035491943359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.2956149399280548,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3455429673194885,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0036951868794858457,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0036951868794858457,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.2956149399280548,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3455429673194885,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.029561495035886766,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4288070142269135,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.47342650294303895,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04288070127367973,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04288070127367973,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6784755921026754,
|
|
"calibration/batch_distribution_entropy": 0.6632685785940718,
|
|
"calibration/confidence_entropy": 0.34600416614606927,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.5261398694458579,
|
|
"calibration/mean_confidence": 0.7872892063716558,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03779296875,
|
|
"completions/max_length": 1490.4,
|
|
"completions/max_terminated_length": 1490.4,
|
|
"completions/mean_length": 204.46240234375,
|
|
"completions/mean_terminated_length": 212.52877502441407,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 1.8,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.053165026009082794,
|
|
"learning_rate": 6.249999999999999e-07,
|
|
"loss": 0.0101,
|
|
"num_tokens": 34234441.0,
|
|
"reward": 0.7344912886619568,
|
|
"reward_std": 0.5754643559455872,
|
|
"rewards/accgated_coverage_0": 0.3019598960876465,
|
|
"rewards/accgated_coverage_1": 0.3019598960876465,
|
|
"rewards/accgated_coverage_10": 0.3019598960876465,
|
|
"rewards/accgated_coverage_15": 0.3019598960876465,
|
|
"rewards/accgated_coverage_20": 0.3019598960876465,
|
|
"rewards/accgated_coverage_25": 0.3019598960876465,
|
|
"rewards/accgated_coverage_5": 0.3019598960876465,
|
|
"rewards/accuracy_reward": 0.21142578125,
|
|
"rewards/brier_reward": 0.38320069313049315,
|
|
"rewards/confidence_uniqueness_reward": 0.5189769625663757,
|
|
"rewards/format_reward": 0.7234375,
|
|
"rewards/frontier_aurc_reward": 0.3019598960876465,
|
|
"rewards/frontier_ece_reward": 0.3019598960876465,
|
|
"rewards/frontier_entropy_batch_reward": -0.685005521774292,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.2811114966869354,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.33583817481994627,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.2811114966869354,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.33583817481994627,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.2811114966869354,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.33583817481994627,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.2811114966869354,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.33583817481994627,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.2811114966869354,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.33583817481994627,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.2811114966869354,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.33583817481994627,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.2811114966869354,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.33583817481994627,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.226324462890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.27484233379364015,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.30625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1131622314453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1131622314453125,
|
|
"signal/advantage_abs_mean": 0.4752094566822052,
|
|
"signal/advantage_pre_scale_abs_mean": 0.4752094566822052,
|
|
"signal/advantage_pre_scale_std": 0.5934478282928467,
|
|
"signal/advantage_std": 0.5934478282928467,
|
|
"signal/brier_reward/centered_abs_mean": 0.30669367909431455,
|
|
"signal/brier_reward/group_std_mean": 0.35490121245384215,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03066936805844307,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03066936805844307,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2765663981437683,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.33613392114639284,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027656640484929086,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.027656640484929086,
|
|
"signal/format_reward/centered_abs_mean": 0.3680908203125,
|
|
"signal/format_reward/group_std_mean": 0.4318098545074463,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.18404541015625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.18404541015625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.2811114966869354,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.33583817481994627,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003513893811032176,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003513893811032176,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.2811114966869354,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.33583817481994627,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.028111150488257408,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4002421200275421,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4575047969818115,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04002421200275421,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04002421200275421,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5994437029504185,
|
|
"calibration/batch_distribution_entropy": 0.6537833231091288,
|
|
"calibration/buffer_distribution_entropy": 0.670862995600238,
|
|
"calibration/confidence_entropy": 0.35392652781636763,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.45825798508943094,
|
|
"calibration/mean_confidence": 0.7940574361042888,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0169921875,
|
|
"completions/max_length": 1496.6,
|
|
"completions/max_terminated_length": 1496.6,
|
|
"completions/mean_length": 177.0701171875,
|
|
"completions/mean_terminated_length": 180.23756408691406,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 8.0,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.00785356666892767,
|
|
"learning_rate": 9.374999999999999e-07,
|
|
"loss": 0.0133,
|
|
"num_tokens": 51096375.0,
|
|
"reward": 0.8466886758804322,
|
|
"reward_std": 0.44576832354068757,
|
|
"rewards/accgated_coverage_0": 0.30328094847500325,
|
|
"rewards/accgated_coverage_1": 0.30328094847500325,
|
|
"rewards/accgated_coverage_10": 0.30328094847500325,
|
|
"rewards/accgated_coverage_15": 0.30328094847500325,
|
|
"rewards/accgated_coverage_20": 0.30328094847500325,
|
|
"rewards/accgated_coverage_25": 0.30328094847500325,
|
|
"rewards/accgated_coverage_5": 0.30328094847500325,
|
|
"rewards/accuracy_reward": 0.26787109375,
|
|
"rewards/brier_reward": 0.48309295177459716,
|
|
"rewards/confidence_uniqueness_reward": 0.6408498525619507,
|
|
"rewards/format_reward": 0.8763671875,
|
|
"rewards/frontier_aurc_reward": 0.30000464636832475,
|
|
"rewards/frontier_ece_reward": 0.2886372864246368,
|
|
"rewards/frontier_entropy_batch_reward": -0.8273520708084107,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.21752286404371263,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.2625477723777294,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.021752287307754158,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.021752287307754158,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.21752286404371263,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.2625477723777294,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.021752287307754158,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.021752287307754158,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.21752286404371263,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.2625477723777294,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.021752287307754158,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.021752287307754158,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.21752286404371263,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.2625477723777294,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.021752287307754158,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.021752287307754158,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.21752286404371263,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.2625477723777294,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.021752287307754158,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.021752287307754158,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.21752286404371263,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.2625477723777294,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.021752287307754158,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.021752287307754158,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.21752286404371263,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.2625477723777294,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.021752287307754158,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.021752287307754158,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.195428466796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.24072909057140351,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0977142333984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0977142333984375,
|
|
"signal/advantage_abs_mean": 0.3535905122756958,
|
|
"signal/advantage_pre_scale_abs_mean": 0.3535905122756958,
|
|
"signal/advantage_pre_scale_std": 0.466570183634758,
|
|
"signal/advantage_std": 0.466570183634758,
|
|
"signal/brier_reward/centered_abs_mean": 0.2721236228942871,
|
|
"signal/brier_reward/group_std_mean": 0.3262364029884338,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027212361991405486,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.027212361991405486,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.20057708621025086,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2654131382703781,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020057709142565727,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020057709142565727,
|
|
"signal/format_reward/centered_abs_mean": 0.19754638671875,
|
|
"signal/format_reward/group_std_mean": 0.2995403289794922,
|
|
"signal/format_reward/group_zero_std_frac": 0.053125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.098773193359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.098773193359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.21606770902872086,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.2601821569725871,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0027008464734535665,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0027008464734535665,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.24298666417598724,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.29195126295089724,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.024298667535185815,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.024298667535185815,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26501129269599916,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37041118144989016,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026501129567623138,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026501129567623138,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5322075250234468,
|
|
"calibration/batch_distribution_entropy": 0.7415808503894759,
|
|
"calibration/buffer_distribution_entropy": 0.6724491622416343,
|
|
"calibration/confidence_entropy": 0.3819189591260301,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3454595472695586,
|
|
"calibration/mean_confidence": 0.7406530881585631,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0072265625,
|
|
"completions/max_length": 1334.6,
|
|
"completions/max_terminated_length": 1334.6,
|
|
"completions/mean_length": 134.69384765625,
|
|
"completions/mean_terminated_length": 135.69620513916016,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.031798213720321655,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0064,
|
|
"num_tokens": 67394040.0,
|
|
"reward": 0.7022075057029724,
|
|
"reward_std": 0.20362389087677002,
|
|
"rewards/accgated_coverage_0": 0.008948421757668256,
|
|
"rewards/accgated_coverage_1": 0.008948421757668256,
|
|
"rewards/accgated_coverage_10": 0.008948421757668256,
|
|
"rewards/accgated_coverage_15": 0.008948421757668256,
|
|
"rewards/accgated_coverage_20": 0.008948421757668256,
|
|
"rewards/accgated_coverage_25": 0.008948421757668256,
|
|
"rewards/accgated_coverage_5": 0.008948421757668256,
|
|
"rewards/accuracy_reward": 0.340625,
|
|
"rewards/brier_reward": 0.5920320749282837,
|
|
"rewards/confidence_uniqueness_reward": 0.7591738820075988,
|
|
"rewards/format_reward": 0.96904296875,
|
|
"rewards/frontier_aurc_reward": -0.006775558087974786,
|
|
"rewards/frontier_ece_reward": -0.043132821563631296,
|
|
"rewards/frontier_entropy_batch_reward": -0.8961299777030944,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.01699206493794918,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.02561163380742073,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0016992064891383051,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0016992064891383051,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.01699206493794918,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.02561163380742073,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0016992064891383051,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0016992064891383051,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.01699206493794918,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.02561163380742073,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0016992064891383051,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0016992064891383051,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.01699206493794918,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.02561163380742073,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0016992064891383051,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0016992064891383051,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.01699206493794918,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.02561163380742073,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0016992064891383051,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0016992064891383051,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.01699206493794918,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.02561163380742073,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0016992064891383051,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0016992064891383051,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.01699206493794918,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.02561163380742073,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0016992064891383051,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0016992064891383051,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2025634765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.2514846593141556,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.346875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10128173828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10128173828125,
|
|
"signal/advantage_abs_mean": 0.15547150671482085,
|
|
"signal/advantage_pre_scale_abs_mean": 0.15547150671482085,
|
|
"signal/advantage_pre_scale_std": 0.2213201105594635,
|
|
"signal/advantage_std": 0.2213201105594635,
|
|
"signal/brier_reward/centered_abs_mean": 0.2514127492904663,
|
|
"signal/brier_reward/group_std_mean": 0.3071712851524353,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02514127641916275,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02514127641916275,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.12233100682497025,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.16284309923648835,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012233100831508636,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012233100831508636,
|
|
"signal/format_reward/centered_abs_mean": 0.054034423828125,
|
|
"signal/format_reward/group_std_mean": 0.11254389882087708,
|
|
"signal/format_reward/group_zero_std_frac": 0.5,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0270172119140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0270172119140625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004748838301748037,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.006619170308113098,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.9360478917369616e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.9360478917369616e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.1384577602148056,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.16538217663764954,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013845776021480561,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013845776021480561,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17491987645626067,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2958513736724854,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.065625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017491987720131875,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017491987720131875,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6534105005979708,
|
|
"calibration/batch_distribution_entropy": 0.882906447603083,
|
|
"calibration/buffer_distribution_entropy": 0.7222234773610909,
|
|
"calibration/confidence_entropy": 0.4605512177454085,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3406275119390077,
|
|
"calibration/mean_confidence": 0.629555657034802,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0021484375,
|
|
"completions/max_length": 879.2,
|
|
"completions/max_terminated_length": 879.2,
|
|
"completions/mean_length": 110.02529296875,
|
|
"completions/mean_terminated_length": 110.26083374023438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 9.0,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.03735971078276634,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0037,
|
|
"num_tokens": 83453851.0,
|
|
"reward": 0.7374543428421021,
|
|
"reward_std": 0.17404116094112396,
|
|
"rewards/accgated_coverage_0": 0.010930617339909077,
|
|
"rewards/accgated_coverage_1": 0.010930617339909077,
|
|
"rewards/accgated_coverage_10": 0.010930617339909077,
|
|
"rewards/accgated_coverage_15": 0.010930617339909077,
|
|
"rewards/accgated_coverage_20": 0.010930617339909077,
|
|
"rewards/accgated_coverage_25": 0.010930617339909077,
|
|
"rewards/accgated_coverage_5": 0.010930617339909077,
|
|
"rewards/accuracy_reward": 0.34697265625,
|
|
"rewards/brier_reward": 0.6479137420654297,
|
|
"rewards/confidence_uniqueness_reward": 0.8465597629547119,
|
|
"rewards/format_reward": 0.9888671875,
|
|
"rewards/frontier_aurc_reward": -0.006063922494649887,
|
|
"rewards/frontier_ece_reward": -0.035230358317494395,
|
|
"rewards/frontier_entropy_batch_reward": -0.8396552681922913,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.025153553858399392,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.035369380936026575,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.002515355497598648,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.002515355497598648,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.025153553858399392,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.035369380936026575,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.002515355497598648,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.002515355497598648,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.025153553858399392,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.035369380936026575,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.002515355497598648,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.002515355497598648,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.025153553858399392,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.035369380936026575,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.002515355497598648,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.002515355497598648,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.025153553858399392,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.035369380936026575,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.002515355497598648,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.002515355497598648,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.025153553858399392,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.035369380936026575,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.002515355497598648,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.002515355497598648,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.025153553858399392,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.035369380936026575,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.002515355497598648,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.002515355497598648,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.193377685546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.2430298238992691,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.35625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0966888427734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0966888427734375,
|
|
"signal/advantage_abs_mean": 0.13480161428451537,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13480161428451537,
|
|
"signal/advantage_pre_scale_std": 0.1890869230031967,
|
|
"signal/advantage_std": 0.1890869230031967,
|
|
"signal/brier_reward/centered_abs_mean": 0.24160752594470977,
|
|
"signal/brier_reward/group_std_mean": 0.29405343532562256,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024160753190517425,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.024160753190517425,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07963932007551193,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.11142729669809341,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007963932119309902,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007963932119309902,
|
|
"signal/format_reward/centered_abs_mean": 0.0205322265625,
|
|
"signal/format_reward/group_std_mean": 0.04784815683960915,
|
|
"signal/format_reward/group_zero_std_frac": 0.765625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01026611328125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01026611328125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003672349965199828,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005370978266000748,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.590437456499785e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.590437456499785e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.11837852597236634,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.14812108874320984,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.01183785293251276,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.01183785293251276,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2632958233356476,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4028609037399292,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026329582929611205,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026329582929611205,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6480277663972374,
|
|
"calibration/batch_distribution_entropy": 0.9591338539033302,
|
|
"calibration/buffer_distribution_entropy": 0.7915711200792763,
|
|
"calibration/confidence_entropy": 0.5127234109549083,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.2450127953620774,
|
|
"calibration/mean_confidence": 0.4834098702409223,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0021484375,
|
|
"completions/max_length": 1057.2,
|
|
"completions/max_terminated_length": 1057.2,
|
|
"completions/mean_length": 107.9982421875,
|
|
"completions/mean_terminated_length": 108.23075714111329,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 14.8,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.018712179735302925,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0025,
|
|
"num_tokens": 99604361.0,
|
|
"reward": 0.7748218059539795,
|
|
"reward_std": 0.16123634278774263,
|
|
"rewards/accgated_coverage_0": 0.01546512171626091,
|
|
"rewards/accgated_coverage_1": 0.01546512171626091,
|
|
"rewards/accgated_coverage_10": 0.01546512171626091,
|
|
"rewards/accgated_coverage_15": 0.01546512171626091,
|
|
"rewards/accgated_coverage_20": 0.01546512171626091,
|
|
"rewards/accgated_coverage_25": 0.01546512171626091,
|
|
"rewards/accgated_coverage_5": 0.01546512171626091,
|
|
"rewards/accuracy_reward": 0.3490234375,
|
|
"rewards/brier_reward": 0.7032750844955444,
|
|
"rewards/confidence_uniqueness_reward": 0.9063532948493958,
|
|
"rewards/format_reward": 0.99130859375,
|
|
"rewards/frontier_aurc_reward": -0.005372885894030332,
|
|
"rewards/frontier_ece_reward": -0.020505653135478495,
|
|
"rewards/frontier_entropy_batch_reward": -0.650149130821228,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.033817265182733536,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.04317799136042595,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0033817265182733538,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0033817265182733538,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.033817265182733536,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.04317799136042595,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0033817265182733538,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0033817265182733538,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.033817265182733536,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.04317799136042595,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0033817265182733538,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0033817265182733538,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.033817265182733536,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.04317799136042595,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0033817265182733538,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0033817265182733538,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.033817265182733536,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.04317799136042595,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0033817265182733538,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0033817265182733538,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.033817265182733536,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.04317799136042595,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0033817265182733538,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0033817265182733538,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.033817265182733536,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.04317799136042595,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0033817265182733538,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0033817265182733538,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18580322265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.2367357134819031,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.359375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.092901611328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.092901611328125,
|
|
"signal/advantage_abs_mean": 0.12383366525173187,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12383366525173187,
|
|
"signal/advantage_pre_scale_std": 0.17370418608188629,
|
|
"signal/advantage_std": 0.17370418608188629,
|
|
"signal/brier_reward/centered_abs_mean": 0.2232639193534851,
|
|
"signal/brier_reward/group_std_mean": 0.27452114820480344,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0223263930529356,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0223263930529356,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05847673192620277,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08610113561153412,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005847673676908016,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005847673676908016,
|
|
"signal/format_reward/centered_abs_mean": 0.015130615234375,
|
|
"signal/format_reward/group_std_mean": 0.03428548686206341,
|
|
"signal/format_reward/group_zero_std_frac": 0.834375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0075653076171875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0075653076171875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002610748796723783,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004314846731722355,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.2634363742545246e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.2634363742545246e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0914211854338646,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.12666151225566863,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009142118506133556,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009142118506133556,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4424427688121796,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5312122881412507,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04424427673220634,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04424427673220634,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5095629672072505,
|
|
"calibration/batch_distribution_entropy": 0.9386988710165081,
|
|
"calibration/buffer_distribution_entropy": 0.8634836316046473,
|
|
"calibration/confidence_entropy": 0.5009593051567744,
|
|
"calibration/coverage@0%": 0.001968503937007874,
|
|
"calibration/coverage@1%": 0.001968503937007874,
|
|
"calibration/coverage@10%": 0.001968503937007874,
|
|
"calibration/coverage@15%": 0.001968503937007874,
|
|
"calibration/coverage@20%": 0.001968503937007874,
|
|
"calibration/coverage@25%": 0.0157728804609483,
|
|
"calibration/coverage@30%": 0.037065337247045305,
|
|
"calibration/coverage@5%": 0.001968503937007874,
|
|
"calibration/ece": 0.18292966624226104,
|
|
"calibration/mean_confidence": 0.37759446560343035,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0064453125,
|
|
"completions/max_length": 1335.8,
|
|
"completions/max_terminated_length": 1335.8,
|
|
"completions/mean_length": 113.25283203125,
|
|
"completions/mean_terminated_length": 113.99161376953126,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.05354702100157738,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0069,
|
|
"num_tokens": 115873542.0,
|
|
"reward": 0.8208463191986084,
|
|
"reward_std": 0.15327103734016417,
|
|
"rewards/accgated_coverage_0": 0.015279607055708766,
|
|
"rewards/accgated_coverage_1": 0.015279607055708766,
|
|
"rewards/accgated_coverage_10": 0.015279607055708766,
|
|
"rewards/accgated_coverage_15": 0.015279607055708766,
|
|
"rewards/accgated_coverage_20": 0.015279607055708766,
|
|
"rewards/accgated_coverage_25": 0.015279607055708766,
|
|
"rewards/accgated_coverage_5": 0.015279607055708766,
|
|
"rewards/accuracy_reward": 0.38798828125,
|
|
"rewards/brier_reward": 0.7118686318397522,
|
|
"rewards/confidence_uniqueness_reward": 0.9270346283912658,
|
|
"rewards/format_reward": 0.985546875,
|
|
"rewards/frontier_aurc_reward": -0.004612564016133547,
|
|
"rewards/frontier_ece_reward": -0.0035013286280445753,
|
|
"rewards/frontier_entropy_batch_reward": -0.40099533796310427,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.05104095339775085,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06317490637302399,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005104095302522182,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005104095302522182,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.05104095339775085,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06317490637302399,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005104095302522182,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005104095302522182,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.05104095339775085,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06317490637302399,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005104095302522182,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005104095302522182,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.05104095339775085,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06317490637302399,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005104095302522182,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005104095302522182,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.05104095339775085,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.06317490637302399,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005104095302522182,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005104095302522182,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.05104095339775085,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.06317490637302399,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005104095302522182,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005104095302522182,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.05104095339775085,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06317490637302399,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005104095302522182,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005104095302522182,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.186871337890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.2370339572429657,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0934356689453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0934356689453125,
|
|
"signal/advantage_abs_mean": 0.11395874172449112,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11395874172449112,
|
|
"signal/advantage_pre_scale_std": 0.1690732568502426,
|
|
"signal/advantage_std": 0.1690732568502426,
|
|
"signal/brier_reward/centered_abs_mean": 0.21671865582466127,
|
|
"signal/brier_reward/group_std_mean": 0.26835680603981016,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02167186588048935,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02167186588048935,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04837794005870819,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08344578742980957,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004837794043123722,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004837794043123722,
|
|
"signal/format_reward/centered_abs_mean": 0.026171875,
|
|
"signal/format_reward/group_std_mean": 0.05847979113459587,
|
|
"signal/format_reward/group_zero_std_frac": 0.721875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0130859375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0130859375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00165214529260993,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002674648817628622,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0651815793826246e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0651815793826246e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05707942098379135,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08767256736755372,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005707942321896553,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005707942321896553,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4562400221824646,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5098948240280151,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04562400206923485,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04562400206923485,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5786534736820717,
|
|
"calibration/batch_distribution_entropy": 0.9021460403906527,
|
|
"calibration/buffer_distribution_entropy": 0.9127727634448434,
|
|
"calibration/confidence_entropy": 0.4836999684154605,
|
|
"calibration/coverage@0%": 0.001987385490975714,
|
|
"calibration/coverage@1%": 0.001987385490975714,
|
|
"calibration/coverage@10%": 0.001987385490975714,
|
|
"calibration/coverage@15%": 0.003975457061552255,
|
|
"calibration/coverage@20%": 0.003975457061552255,
|
|
"calibration/coverage@25%": 0.004373071375667563,
|
|
"calibration/coverage@30%": 0.009471110591353837,
|
|
"calibration/coverage@5%": 0.001987385490975714,
|
|
"calibration/ece": 0.1779325393409029,
|
|
"calibration/mean_confidence": 0.3209674774972401,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0078125,
|
|
"completions/max_length": 1260.2,
|
|
"completions/max_terminated_length": 1260.2,
|
|
"completions/mean_length": 120.1251953125,
|
|
"completions/mean_terminated_length": 121.06528930664062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 28.6,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.045759644359350204,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0088,
|
|
"num_tokens": 132020296.0,
|
|
"reward": 0.8203153371810913,
|
|
"reward_std": 0.13453607708215715,
|
|
"rewards/accgated_coverage_0": 0.017799985222518445,
|
|
"rewards/accgated_coverage_1": 0.017799985222518445,
|
|
"rewards/accgated_coverage_10": 0.017799985222518445,
|
|
"rewards/accgated_coverage_15": 0.017799985222518445,
|
|
"rewards/accgated_coverage_20": 0.017799985222518445,
|
|
"rewards/accgated_coverage_25": 0.017799985222518445,
|
|
"rewards/accgated_coverage_5": 0.017799985222518445,
|
|
"rewards/accuracy_reward": 0.37900390625,
|
|
"rewards/brier_reward": 0.7211938977241517,
|
|
"rewards/confidence_uniqueness_reward": 0.9303524374961853,
|
|
"rewards/format_reward": 0.9890625,
|
|
"rewards/frontier_aurc_reward": -0.004417700413614512,
|
|
"rewards/frontier_ece_reward": 0.00374011246021837,
|
|
"rewards/frontier_entropy_batch_reward": -0.4165126860141754,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04929931238293648,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.060933683067560196,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004929931275546551,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004929931275546551,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04929931238293648,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.060933683067560196,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004929931275546551,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004929931275546551,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04929931238293648,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.060933683067560196,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004929931275546551,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004929931275546551,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04929931238293648,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.060933683067560196,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004929931275546551,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004929931275546551,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04929931238293648,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.060933683067560196,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004929931275546551,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004929931275546551,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.04929931238293648,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.060933683067560196,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004929931275546551,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004929931275546551,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04929931238293648,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.060933683067560196,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004929931275546551,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004929931275546551,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.173748779296875,
|
|
"signal/accuracy_reward/group_std_mean": 0.22208267450332642,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.390625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0868743896484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0868743896484375,
|
|
"signal/advantage_abs_mean": 0.101476289331913,
|
|
"signal/advantage_pre_scale_abs_mean": 0.101476289331913,
|
|
"signal/advantage_pre_scale_std": 0.15157161951065062,
|
|
"signal/advantage_std": 0.15157161951065062,
|
|
"signal/brier_reward/centered_abs_mean": 0.20460852086544037,
|
|
"signal/brier_reward/group_std_mean": 0.25686987340450285,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02046085223555565,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02046085223555565,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0362715695053339,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06078647375106812,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003627156838774681,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003627156838774681,
|
|
"signal/format_reward/centered_abs_mean": 0.01942138671875,
|
|
"signal/format_reward/group_std_mean": 0.041324655339121816,
|
|
"signal/format_reward/group_zero_std_frac": 0.809375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009710693359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009710693359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014763909159228206,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002241973439231515,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.845488623075653e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.845488623075653e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.046713653951883316,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07055558562278748,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004671365395188332,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004671365395188332,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42762730121612547,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4882843255996704,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04276273101568222,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04276273101568222,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.421006288092185,
|
|
"calibration/batch_distribution_entropy": 0.9715952995597832,
|
|
"calibration/buffer_distribution_entropy": 0.9399097549752383,
|
|
"calibration/confidence_entropy": 0.5176721474972316,
|
|
"calibration/coverage@0%": 0.0011928429423459243,
|
|
"calibration/coverage@1%": 0.0011928429423459243,
|
|
"calibration/coverage@10%": 0.0011928429423459243,
|
|
"calibration/coverage@15%": 0.0011928429423459243,
|
|
"calibration/coverage@20%": 0.0031888509263778607,
|
|
"calibration/coverage@25%": 0.11491847319277945,
|
|
"calibration/coverage@30%": 0.18768189267588084,
|
|
"calibration/coverage@5%": 0.0011928429423459243,
|
|
"calibration/ece": 0.22219560625757112,
|
|
"calibration/mean_confidence": 0.42302083297278054,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0109375,
|
|
"completions/max_length": 1318.2,
|
|
"completions/max_terminated_length": 1318.2,
|
|
"completions/mean_length": 122.58076171875,
|
|
"completions/mean_terminated_length": 123.93398590087891,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 5.2,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.03618094325065613,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0104,
|
|
"num_tokens": 148225955.0,
|
|
"reward": 0.8666697382926941,
|
|
"reward_std": 0.15474329888820648,
|
|
"rewards/accgated_coverage_0": 0.004803288914263249,
|
|
"rewards/accgated_coverage_1": 0.004803288914263249,
|
|
"rewards/accgated_coverage_10": 0.004803288914263249,
|
|
"rewards/accgated_coverage_15": 0.004803288914263249,
|
|
"rewards/accgated_coverage_20": 0.004803288914263249,
|
|
"rewards/accgated_coverage_25": 0.004803288914263249,
|
|
"rewards/accgated_coverage_5": 0.004803288914263249,
|
|
"rewards/accuracy_reward": 0.46796875,
|
|
"rewards/brier_reward": 0.6943991541862488,
|
|
"rewards/confidence_uniqueness_reward": 0.9354530334472656,
|
|
"rewards/format_reward": 0.98427734375,
|
|
"rewards/frontier_aurc_reward": -0.004057955369353295,
|
|
"rewards/frontier_ece_reward": 0.0027345028007403015,
|
|
"rewards/frontier_entropy_batch_reward": -0.2602355360984802,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.0669349491596222,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.08391269594430924,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.006693494878709317,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.006693494878709317,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.0669349491596222,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.08391269594430924,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.006693494878709317,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.006693494878709317,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.0669349491596222,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.08391269594430924,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.006693494878709317,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.006693494878709317,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.0669349491596222,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.08391269594430924,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006693494878709317,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006693494878709317,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.0669349491596222,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.08391269594430924,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.006693494878709317,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.006693494878709317,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.0669349491596222,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.08391269594430924,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.006693494878709317,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.006693494878709317,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.0669349491596222,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.08391269594430924,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.006693494878709317,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.006693494878709317,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17861328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.2309749722480774,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.359375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.089306640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.089306640625,
|
|
"signal/advantage_abs_mean": 0.11762301176786423,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11762301176786423,
|
|
"signal/advantage_pre_scale_std": 0.16891648769378662,
|
|
"signal/advantage_std": 0.16891648769378662,
|
|
"signal/brier_reward/centered_abs_mean": 0.21884905993938447,
|
|
"signal/brier_reward/group_std_mean": 0.2688636898994446,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02188490703701973,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02188490703701973,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.038263066112995146,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06704763397574424,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038263065740466117,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038263065740466117,
|
|
"signal/format_reward/centered_abs_mean": 0.027459716796875,
|
|
"signal/format_reward/group_std_mean": 0.05488141924142838,
|
|
"signal/format_reward/group_zero_std_frac": 0.759375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0137298583984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0137298583984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020087390905246137,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0029508148785680533,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5109239504672588e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5109239504672588e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.052871844917535785,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07742156088352203,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005287184566259384,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005287184566259384,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3467997610569,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.420889800786972,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034679976850748064,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034679976850748064,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.49606457835475953,
|
|
"calibration/batch_distribution_entropy": 0.9855216279436447,
|
|
"calibration/buffer_distribution_entropy": 0.9546393763284033,
|
|
"calibration/confidence_entropy": 0.5309623955093902,
|
|
"calibration/coverage@0%": 0.002379660992144125,
|
|
"calibration/coverage@1%": 0.002379660992144125,
|
|
"calibration/coverage@10%": 0.002379660992144125,
|
|
"calibration/coverage@15%": 0.00553547953257805,
|
|
"calibration/coverage@20%": 0.00553547953257805,
|
|
"calibration/coverage@25%": 0.018316860835618055,
|
|
"calibration/coverage@30%": 0.01950661707393295,
|
|
"calibration/coverage@5%": 0.002379660992144125,
|
|
"calibration/ece": 0.17130343214695962,
|
|
"calibration/mean_confidence": 0.5091154509440758,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00654296875,
|
|
"completions/max_length": 1147.4,
|
|
"completions/max_terminated_length": 1147.4,
|
|
"completions/mean_length": 129.88291015625,
|
|
"completions/mean_terminated_length": 130.73819885253906,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 11.4,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.01775994896888733,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0066,
|
|
"num_tokens": 164576884.0,
|
|
"reward": 0.8649451255798339,
|
|
"reward_std": 0.15237079560756683,
|
|
"rewards/accgated_coverage_0": 0.013734531402587891,
|
|
"rewards/accgated_coverage_1": 0.013734531402587891,
|
|
"rewards/accgated_coverage_10": 0.013734531402587891,
|
|
"rewards/accgated_coverage_15": 0.013734531402587891,
|
|
"rewards/accgated_coverage_20": 0.013734531402587891,
|
|
"rewards/accgated_coverage_25": 0.013734531402587891,
|
|
"rewards/accgated_coverage_5": 0.013734531402587891,
|
|
"rewards/accuracy_reward": 0.42509765625,
|
|
"rewards/brier_reward": 0.7105550646781922,
|
|
"rewards/confidence_uniqueness_reward": 0.9447488427162171,
|
|
"rewards/format_reward": 0.98974609375,
|
|
"rewards/frontier_aurc_reward": -0.004470669943839311,
|
|
"rewards/frontier_ece_reward": 0.0029585707816295326,
|
|
"rewards/frontier_entropy_batch_reward": -0.17861297130584716,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04411946162581444,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.056043070554733274,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004411946143954992,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004411946143954992,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04411946162581444,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.056043070554733274,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004411946143954992,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004411946143954992,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04411946162581444,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.056043070554733274,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004411946143954992,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004411946143954992,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04411946162581444,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.056043070554733274,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004411946143954992,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004411946143954992,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04411946162581444,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.056043070554733274,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004411946143954992,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004411946143954992,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.04411946162581444,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.056043070554733274,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004411946143954992,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004411946143954992,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04411946162581444,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.056043070554733274,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004411946143954992,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004411946143954992,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.170623779296875,
|
|
"signal/accuracy_reward/group_std_mean": 0.21849047839641572,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0853118896484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0853118896484375,
|
|
"signal/advantage_abs_mean": 0.1180063620209694,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1180063620209694,
|
|
"signal/advantage_pre_scale_std": 0.1680304616689682,
|
|
"signal/advantage_std": 0.1680304616689682,
|
|
"signal/brier_reward/centered_abs_mean": 0.2101664960384369,
|
|
"signal/brier_reward/group_std_mean": 0.2581899106502533,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02101665027439594,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02101665027439594,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027367017790675165,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04632028862833977,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027367019560188056,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027367019560188056,
|
|
"signal/format_reward/centered_abs_mean": 0.017938232421875,
|
|
"signal/format_reward/group_std_mean": 0.035063137859106065,
|
|
"signal/format_reward/group_zero_std_frac": 0.85,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0089691162109375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0089691162109375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027033270802348853,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003944651270285249,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.379158952157013e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.379158952157013e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06154962629079819,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08616945594549179,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0061549627222120765,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0061549627222120765,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2729690343141556,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35792847275733947,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02729690447449684,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02729690447449684,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"eval_calibration/aurc": 0.6405628666985336,
|
|
"eval_calibration/batch_distribution_entropy": 0.9033017844710131,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9602131464411764,
|
|
"eval_calibration/confidence_entropy": 0.5389164854891668,
|
|
"eval_calibration/coverage@0%": 0.03125,
|
|
"eval_calibration/coverage@1%": 0.03125,
|
|
"eval_calibration/coverage@10%": 0.03125,
|
|
"eval_calibration/coverage@15%": 0.03125,
|
|
"eval_calibration/coverage@20%": 0.0390625,
|
|
"eval_calibration/coverage@25%": 0.0703125,
|
|
"eval_calibration/coverage@30%": 0.0859375,
|
|
"eval_calibration/coverage@5%": 0.03125,
|
|
"eval_calibration/ece": 0.3262753593432853,
|
|
"eval_calibration/mean_confidence": 0.5574982158874487,
|
|
"eval_completions/clipped_ratio": 0.001953125,
|
|
"eval_completions/max_length": 405.25,
|
|
"eval_completions/max_terminated_length": 405.25,
|
|
"eval_completions/mean_length": 138.56182479858398,
|
|
"eval_completions/mean_terminated_length": 138.84318161010742,
|
|
"eval_completions/min_length": 51.0,
|
|
"eval_completions/min_terminated_length": 66.75,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 164576884.0,
|
|
"eval_reward": 0.7559798359870911,
|
|
"eval_reward_std": 0.23514112457633018,
|
|
"eval_rewards/accgated_coverage_0": 0.013314789393916726,
|
|
"eval_rewards/accgated_coverage_1": 0.013314789393916726,
|
|
"eval_rewards/accgated_coverage_10": 0.013314789393916726,
|
|
"eval_rewards/accgated_coverage_15": 0.013314789393916726,
|
|
"eval_rewards/accgated_coverage_20": 0.013314789393916726,
|
|
"eval_rewards/accgated_coverage_25": 0.013314789393916726,
|
|
"eval_rewards/accgated_coverage_5": 0.013314789393916726,
|
|
"eval_rewards/accuracy_reward": 0.376953125,
|
|
"eval_rewards/brier_reward": 0.6998499184846878,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8947123885154724,
|
|
"eval_rewards/format_reward": 0.998046875,
|
|
"eval_rewards/frontier_aurc_reward": -0.0051660287426784635,
|
|
"eval_rewards/frontier_ece_reward": -0.004275021725334227,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.998046875,
|
|
"eval_runtime": 28.9215,
|
|
"eval_samples_per_second": 17.288,
|
|
"eval_signal/accgated_coverage_0/centered_abs_mean": 0.03896623570472002,
|
|
"eval_signal/accgated_coverage_0/group_std_mean": 0.052276285365223885,
|
|
"eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00389662355883047,
|
|
"eval_signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00389662355883047,
|
|
"eval_signal/accgated_coverage_1/centered_abs_mean": 0.03896623570472002,
|
|
"eval_signal/accgated_coverage_1/group_std_mean": 0.052276285365223885,
|
|
"eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00389662355883047,
|
|
"eval_signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00389662355883047,
|
|
"eval_signal/accgated_coverage_10/centered_abs_mean": 0.03896623570472002,
|
|
"eval_signal/accgated_coverage_10/group_std_mean": 0.052276285365223885,
|
|
"eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00389662355883047,
|
|
"eval_signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00389662355883047,
|
|
"eval_signal/accgated_coverage_15/centered_abs_mean": 0.03896623570472002,
|
|
"eval_signal/accgated_coverage_15/group_std_mean": 0.052276285365223885,
|
|
"eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00389662355883047,
|
|
"eval_signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00389662355883047,
|
|
"eval_signal/accgated_coverage_20/centered_abs_mean": 0.03896623570472002,
|
|
"eval_signal/accgated_coverage_20/group_std_mean": 0.052276285365223885,
|
|
"eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.00389662355883047,
|
|
"eval_signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.00389662355883047,
|
|
"eval_signal/accgated_coverage_25/centered_abs_mean": 0.03896623570472002,
|
|
"eval_signal/accgated_coverage_25/group_std_mean": 0.052276285365223885,
|
|
"eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.00389662355883047,
|
|
"eval_signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.00389662355883047,
|
|
"eval_signal/accgated_coverage_5/centered_abs_mean": 0.03896623570472002,
|
|
"eval_signal/accgated_coverage_5/group_std_mean": 0.052276285365223885,
|
|
"eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00389662355883047,
|
|
"eval_signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00389662355883047,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4581298828125,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.48544733971357346,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22906494140625,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22906494140625,
|
|
"eval_signal/advantage_abs_mean": 0.21391661465168,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21391661465168,
|
|
"eval_signal/advantage_pre_scale_std": 0.23284973949193954,
|
|
"eval_signal/advantage_std": 0.23284973949193954,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2159881703555584,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2689853310585022,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0215988177806139,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0215988177806139,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04416041262447834,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05725146550685167,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004416041250806302,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004416041250806302,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
|
|
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0039943401352502406,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005418717511929572,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.992925369151635e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.992925369151635e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.06499312072992325,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.0963602364063263,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006499312003143132,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006499312003143132,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0037841796875,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.011048543266952038,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9375,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0003784179862122983,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0003784179862122983,
|
|
"eval_steps_per_second": 0.138,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4754503249339259,
|
|
"calibration/batch_distribution_entropy": 0.9860094280106372,
|
|
"calibration/buffer_distribution_entropy": 0.9636066838694577,
|
|
"calibration/confidence_entropy": 0.5166222641614133,
|
|
"calibration/coverage@0%": 0.003930101199956869,
|
|
"calibration/coverage@1%": 0.003930101199956869,
|
|
"calibration/coverage@10%": 0.003930101199956869,
|
|
"calibration/coverage@15%": 0.003930101199956869,
|
|
"calibration/coverage@20%": 0.004715955816852743,
|
|
"calibration/coverage@25%": 0.01453847381957539,
|
|
"calibration/coverage@30%": 0.015719576181780114,
|
|
"calibration/coverage@5%": 0.003930101199956869,
|
|
"calibration/ece": 0.19332276668528464,
|
|
"calibration/mean_confidence": 0.5447290779242254,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00283203125,
|
|
"completions/max_length": 956.6,
|
|
"completions/max_terminated_length": 956.6,
|
|
"completions/mean_length": 142.855859375,
|
|
"completions/mean_terminated_length": 143.2583770751953,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 51.2,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.026265909895300865,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0021,
|
|
"num_tokens": 181276848.0,
|
|
"reward": 0.8707459092140197,
|
|
"reward_std": 0.1425304591655731,
|
|
"rewards/accgated_coverage_0": 0.013036295026540756,
|
|
"rewards/accgated_coverage_1": 0.013036295026540756,
|
|
"rewards/accgated_coverage_10": 0.013036295026540756,
|
|
"rewards/accgated_coverage_15": 0.013036295026540756,
|
|
"rewards/accgated_coverage_20": 0.013036295026540756,
|
|
"rewards/accgated_coverage_25": 0.013036295026540756,
|
|
"rewards/accgated_coverage_5": 0.013036295026540756,
|
|
"rewards/accuracy_reward": 0.4294921875,
|
|
"rewards/brier_reward": 0.7150080680847168,
|
|
"rewards/confidence_uniqueness_reward": 0.9512990832328796,
|
|
"rewards/format_reward": 0.9962890625,
|
|
"rewards/frontier_aurc_reward": -0.004525785241276026,
|
|
"rewards/frontier_ece_reward": 0.0045379682444036005,
|
|
"rewards/frontier_entropy_batch_reward": -0.18298066556453704,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04418762475252151,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.05718696340918541,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004418762493878603,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004418762493878603,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04418762475252151,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.05718696340918541,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004418762493878603,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004418762493878603,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04418762475252151,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.05718696340918541,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004418762493878603,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004418762493878603,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04418762475252151,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.05718696340918541,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004418762493878603,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004418762493878603,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04418762475252151,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.05718696340918541,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004418762493878603,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004418762493878603,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.04418762475252151,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.05718696340918541,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004418762493878603,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004418762493878603,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04418762475252151,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.05718696340918541,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004418762493878603,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004418762493878603,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.161376953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.20732997953891755,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0806884765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0806884765625,
|
|
"signal/advantage_abs_mean": 0.11180974841117859,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11180974841117859,
|
|
"signal/advantage_pre_scale_std": 0.15546224117279053,
|
|
"signal/advantage_std": 0.15546224117279053,
|
|
"signal/brier_reward/centered_abs_mean": 0.20797090530395507,
|
|
"signal/brier_reward/group_std_mean": 0.25753060579299925,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020797090604901314,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020797090604901314,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.017350507155060767,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.030293600633740425,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0017350507900118829,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017350507900118829,
|
|
"signal/format_reward/centered_abs_mean": 0.00703125,
|
|
"signal/format_reward/group_std_mean": 0.017755493894219397,
|
|
"signal/format_reward/group_zero_std_frac": 0.909375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003515625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.003515625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029423195403069256,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004191439598798752,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6778994399355724e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6778994399355724e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.062749931961298,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08544526249170303,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006274993345141411,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006274993345141411,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2763451874256134,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35692101120948794,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02763451896607876,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02763451896607876,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.40837566973095046,
|
|
"calibration/batch_distribution_entropy": 0.9885787564340249,
|
|
"calibration/buffer_distribution_entropy": 0.9697773388363558,
|
|
"calibration/confidence_entropy": 0.48658811017166015,
|
|
"calibration/coverage@0%": 0.002740502450980392,
|
|
"calibration/coverage@1%": 0.002740502450980392,
|
|
"calibration/coverage@10%": 0.006669775535459763,
|
|
"calibration/coverage@15%": 0.008622900535459763,
|
|
"calibration/coverage@20%": 0.025432818940444545,
|
|
"calibration/coverage@25%": 0.047359947417080786,
|
|
"calibration/coverage@30%": 0.17184148862148002,
|
|
"calibration/coverage@5%": 0.002740502450980392,
|
|
"calibration/ece": 0.15979824998045,
|
|
"calibration/mean_confidence": 0.4947491103908284,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0021484375,
|
|
"completions/max_length": 1097.0,
|
|
"completions/max_terminated_length": 1097.0,
|
|
"completions/mean_length": 153.2072265625,
|
|
"completions/mean_terminated_length": 153.53602600097656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 38.6,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.06689594686031342,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0015,
|
|
"num_tokens": 197660506.0,
|
|
"reward": 0.8970973372459412,
|
|
"reward_std": 0.13292600810527802,
|
|
"rewards/accgated_coverage_0": 0.016102191619575025,
|
|
"rewards/accgated_coverage_1": 0.016102191619575025,
|
|
"rewards/accgated_coverage_10": 0.016102191619575025,
|
|
"rewards/accgated_coverage_15": 0.016102191619575025,
|
|
"rewards/accgated_coverage_20": 0.016102191619575025,
|
|
"rewards/accgated_coverage_25": 0.016102191619575025,
|
|
"rewards/accgated_coverage_5": 0.016102191619575025,
|
|
"rewards/accuracy_reward": 0.46796875,
|
|
"rewards/brier_reward": 0.726796281337738,
|
|
"rewards/confidence_uniqueness_reward": 0.9531630277633667,
|
|
"rewards/format_reward": 0.99716796875,
|
|
"rewards/frontier_aurc_reward": -0.00395333026535809,
|
|
"rewards/frontier_ece_reward": 0.012035092897713184,
|
|
"rewards/frontier_entropy_batch_reward": -0.15892549753189086,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.05685779377818108,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.07282028794288635,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005685779452323914,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005685779452323914,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.05685779377818108,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.07282028794288635,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005685779452323914,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005685779452323914,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.05685779377818108,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.07282028794288635,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005685779452323914,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005685779452323914,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.05685779377818108,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.07282028794288635,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005685779452323914,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005685779452323914,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.05685779377818108,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.07282028794288635,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005685779452323914,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005685779452323914,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.05685779377818108,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.07282028794288635,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005685779452323914,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005685779452323914,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.05685779377818108,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.07282028794288635,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005685779452323914,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005685779452323914,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1550048828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.20295966863632203,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.428125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07750244140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07750244140625,
|
|
"signal/advantage_abs_mean": 0.10285976082086563,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10285976082086563,
|
|
"signal/advantage_pre_scale_std": 0.1445027083158493,
|
|
"signal/advantage_std": 0.1445027083158493,
|
|
"signal/brier_reward/centered_abs_mean": 0.21229986250400543,
|
|
"signal/brier_reward/group_std_mean": 0.2616199791431427,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02122998610138893,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02122998610138893,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01506846398115158,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.025943630561232566,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0015068464446812869,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015068464446812869,
|
|
"signal/format_reward/centered_abs_mean": 0.005401611328125,
|
|
"signal/format_reward/group_std_mean": 0.01394332442432642,
|
|
"signal/format_reward/group_zero_std_frac": 0.928125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0027008056640625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0027008056640625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002814545203000307,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004039418138563633,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.51818154740613e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.51818154740613e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05990893542766571,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08055989742279053,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005990893673151731,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005990893673151731,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24653230607509613,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3289069652557373,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024653231725096703,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024653231725096703,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34297082237054877,
|
|
"calibration/batch_distribution_entropy": 0.98456109382964,
|
|
"calibration/buffer_distribution_entropy": 0.9742222514195511,
|
|
"calibration/confidence_entropy": 0.4718204105868005,
|
|
"calibration/coverage@0%": 0.00703125,
|
|
"calibration/coverage@1%": 0.00703125,
|
|
"calibration/coverage@10%": 0.07265625,
|
|
"calibration/coverage@15%": 0.201953125,
|
|
"calibration/coverage@20%": 0.29921875,
|
|
"calibration/coverage@25%": 0.384375,
|
|
"calibration/coverage@30%": 0.484765625,
|
|
"calibration/coverage@5%": 0.00703125,
|
|
"calibration/ece": 0.20060203447078181,
|
|
"calibration/mean_confidence": 0.49395130888201433,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0013671875,
|
|
"completions/max_length": 634.4,
|
|
"completions/max_terminated_length": 634.4,
|
|
"completions/mean_length": 161.644921875,
|
|
"completions/mean_terminated_length": 161.86683044433593,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 52.6,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.03316055238246918,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0013,
|
|
"num_tokens": 214347974.0,
|
|
"reward": 0.9171293497085571,
|
|
"reward_std": 0.1312678873538971,
|
|
"rewards/accgated_coverage_0": 0.01277830610051751,
|
|
"rewards/accgated_coverage_1": 0.01277830610051751,
|
|
"rewards/accgated_coverage_10": 0.01277830610051751,
|
|
"rewards/accgated_coverage_15": 0.01277830610051751,
|
|
"rewards/accgated_coverage_20": 0.01277830610051751,
|
|
"rewards/accgated_coverage_25": 0.01277830610051751,
|
|
"rewards/accgated_coverage_5": 0.01277830610051751,
|
|
"rewards/accuracy_reward": 0.51337890625,
|
|
"rewards/brier_reward": 0.7331403970718384,
|
|
"rewards/confidence_uniqueness_reward": 0.953273355960846,
|
|
"rewards/format_reward": 0.9982421875,
|
|
"rewards/frontier_aurc_reward": -0.0034087498672306536,
|
|
"rewards/frontier_ece_reward": 0.016286897659301757,
|
|
"rewards/frontier_entropy_batch_reward": -0.17853465378284455,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.07022299095988274,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.08959027081727981,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007022299244999886,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007022299244999886,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.07022299095988274,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.08959027081727981,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007022299244999886,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007022299244999886,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.07022299095988274,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.08959027081727981,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007022299244999886,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007022299244999886,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.07022299095988274,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.08959027081727981,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.007022299244999886,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.007022299244999886,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.07022299095988274,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.08959027081727981,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.007022299244999886,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.007022299244999886,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.07022299095988274,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.08959027081727981,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.007022299244999886,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.007022299244999886,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.07022299095988274,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.08959027081727981,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007022299244999886,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007022299244999886,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.148431396484375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1960848778486252,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.446875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0742156982421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0742156982421875,
|
|
"signal/advantage_abs_mean": 0.10217523276805877,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10217523276805877,
|
|
"signal/advantage_pre_scale_std": 0.14075265526771547,
|
|
"signal/advantage_std": 0.14075265526771547,
|
|
"signal/brier_reward/centered_abs_mean": 0.21132160127162933,
|
|
"signal/brier_reward/group_std_mean": 0.26202887296676636,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021132160723209382,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021132160723209382,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014979423955082893,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.023213838413357734,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014979424653574825,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014979424653574825,
|
|
"signal/format_reward/centered_abs_mean": 0.0033447265625,
|
|
"signal/format_reward/group_std_mean": 0.008539242530241608,
|
|
"signal/format_reward/group_zero_std_frac": 0.95625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00167236328125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00167236328125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002694142144173384,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0039808189030736685,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.36767770932056e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.36767770932056e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.055422401428222655,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0753881111741066,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005542240105569363,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005542240105569363,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26786054074764254,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34874598383903505,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02678605616092682,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02678605616092682,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.35186780951983854,
|
|
"calibration/batch_distribution_entropy": 0.9845853007050364,
|
|
"calibration/buffer_distribution_entropy": 0.9778449012308194,
|
|
"calibration/confidence_entropy": 0.464560058342187,
|
|
"calibration/coverage@0%": 0.003125764432485323,
|
|
"calibration/coverage@1%": 0.003125764432485323,
|
|
"calibration/coverage@10%": 0.003125764432485323,
|
|
"calibration/coverage@15%": 0.02970661081213307,
|
|
"calibration/coverage@20%": 0.18998593444227005,
|
|
"calibration/coverage@25%": 0.3162434258806262,
|
|
"calibration/coverage@30%": 0.4678510273972603,
|
|
"calibration/coverage@5%": 0.003125764432485323,
|
|
"calibration/ece": 0.16684519307308002,
|
|
"calibration/mean_confidence": 0.46676442179175454,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 1037.8,
|
|
"completions/max_terminated_length": 1037.8,
|
|
"completions/mean_length": 166.5966796875,
|
|
"completions/mean_terminated_length": 166.72836303710938,
|
|
"completions/min_length": 11.4,
|
|
"completions/min_terminated_length": 61.4,
|
|
"epoch": 0.224,
|
|
"grad_norm": 0.019673509523272514,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 231207108.0,
|
|
"reward": 0.8996399760246276,
|
|
"reward_std": 0.12111974656581878,
|
|
"rewards/accgated_coverage_0": 0.02115403041243553,
|
|
"rewards/accgated_coverage_1": 0.02115403041243553,
|
|
"rewards/accgated_coverage_10": 0.02115403041243553,
|
|
"rewards/accgated_coverage_15": 0.02115403041243553,
|
|
"rewards/accgated_coverage_20": 0.02115403041243553,
|
|
"rewards/accgated_coverage_25": 0.02115403041243553,
|
|
"rewards/accgated_coverage_5": 0.02115403041243553,
|
|
"rewards/accuracy_reward": 0.46669921875,
|
|
"rewards/brier_reward": 0.7538125157356262,
|
|
"rewards/confidence_uniqueness_reward": 0.9529985308647155,
|
|
"rewards/format_reward": 0.99873046875,
|
|
"rewards/frontier_aurc_reward": -0.0034322818275541065,
|
|
"rewards/frontier_ece_reward": 0.01811833530664444,
|
|
"rewards/frontier_entropy_batch_reward": -0.20332725048065187,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.05779874622821808,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.07419940680265427,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005779874604195356,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005779874604195356,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.05779874622821808,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.07419940680265427,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005779874604195356,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005779874604195356,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.05779874622821808,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.07419940680265427,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005779874604195356,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005779874604195356,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.05779874622821808,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.07419940680265427,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005779874604195356,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005779874604195356,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.05779874622821808,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.07419940680265427,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005779874604195356,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005779874604195356,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.05779874622821808,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.07419940680265427,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005779874604195356,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005779874604195356,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.05779874622821808,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.07419940680265427,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005779874604195356,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005779874604195356,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.135028076171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.17842960655689238,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.490625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0675140380859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0675140380859375,
|
|
"signal/advantage_abs_mean": 0.09368720501661301,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09368720501661301,
|
|
"signal/advantage_pre_scale_std": 0.13311106264591216,
|
|
"signal/advantage_std": 0.13311106264591216,
|
|
"signal/brier_reward/centered_abs_mean": 0.1977373868227005,
|
|
"signal/brier_reward/group_std_mean": 0.2476351499557495,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019773739948868753,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019773739948868753,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01538306288421154,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.023301176354289056,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0015383063117042183,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015383063117042183,
|
|
"signal/format_reward/centered_abs_mean": 0.002459716796875,
|
|
"signal/format_reward/group_std_mean": 0.007181553076952696,
|
|
"signal/format_reward/group_zero_std_frac": 0.959375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012298583984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0012298583984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026450737379491327,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003880691761150956,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3063420414691794e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3063420414691794e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0504297137260437,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06822670623660088,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0050429713912308214,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0050429713912308214,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2879321575164795,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36692259907722474,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028793216869235038,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028793216869235038,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.398296738207578,
|
|
"calibration/batch_distribution_entropy": 0.9787268880157762,
|
|
"calibration/buffer_distribution_entropy": 0.980509843864418,
|
|
"calibration/confidence_entropy": 0.4779615453676459,
|
|
"calibration/coverage@0%": 0.0050804182974559685,
|
|
"calibration/coverage@1%": 0.0050804182974559685,
|
|
"calibration/coverage@10%": 0.021517245596868885,
|
|
"calibration/coverage@15%": 0.1330594116927593,
|
|
"calibration/coverage@20%": 0.17375703277886498,
|
|
"calibration/coverage@25%": 0.22266542318982388,
|
|
"calibration/coverage@30%": 0.2637269141389432,
|
|
"calibration/coverage@5%": 0.0050804182974559685,
|
|
"calibration/ece": 0.19204764250205275,
|
|
"calibration/mean_confidence": 0.5236280753745388,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 715.8,
|
|
"completions/max_terminated_length": 715.8,
|
|
"completions/mean_length": 173.01669921875,
|
|
"completions/mean_terminated_length": 173.11717834472657,
|
|
"completions/min_length": 28.6,
|
|
"completions/min_terminated_length": 63.0,
|
|
"epoch": 0.24,
|
|
"grad_norm": 0.016353704035282135,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0002,
|
|
"num_tokens": 248230479.0,
|
|
"reward": 0.9228451728820801,
|
|
"reward_std": 0.13029859066009522,
|
|
"rewards/accgated_coverage_0": 0.014048190228641034,
|
|
"rewards/accgated_coverage_1": 0.014048190228641034,
|
|
"rewards/accgated_coverage_10": 0.014048190228641034,
|
|
"rewards/accgated_coverage_15": 0.014048190228641034,
|
|
"rewards/accgated_coverage_20": 0.014048190228641034,
|
|
"rewards/accgated_coverage_25": 0.014048190228641034,
|
|
"rewards/accgated_coverage_5": 0.014048190228641034,
|
|
"rewards/accuracy_reward": 0.52412109375,
|
|
"rewards/brier_reward": 0.7445278406143189,
|
|
"rewards/confidence_uniqueness_reward": 0.9542921662330628,
|
|
"rewards/format_reward": 0.998828125,
|
|
"rewards/frontier_aurc_reward": -0.0032572926487773658,
|
|
"rewards/frontier_ece_reward": 0.018665025755763055,
|
|
"rewards/frontier_entropy_batch_reward": -0.2017095595598221,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.06603854522109032,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.08608146607875825,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0066038545221090315,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0066038545221090315,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.06603854522109032,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.08608146607875825,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0066038545221090315,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0066038545221090315,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.06603854522109032,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.08608146607875825,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0066038545221090315,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0066038545221090315,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.06603854522109032,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.08608146607875825,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0066038545221090315,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0066038545221090315,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.06603854522109032,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.08608146607875825,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0066038545221090315,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0066038545221090315,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.06603854522109032,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.08608146607875825,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0066038545221090315,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0066038545221090315,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.06603854522109032,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.08608146607875825,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0066038545221090315,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0066038545221090315,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.153692626953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.20447224378585815,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.409375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0768463134765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0768463134765625,
|
|
"signal/advantage_abs_mean": 0.10250550508499146,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10250550508499146,
|
|
"signal/advantage_pre_scale_std": 0.14105989634990693,
|
|
"signal/advantage_std": 0.14105989634990693,
|
|
"signal/brier_reward/centered_abs_mean": 0.20125386118888855,
|
|
"signal/brier_reward/group_std_mean": 0.250895568728447,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020125385373830795,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020125385373830795,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014721071161329747,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.022287074476480484,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014721071347594261,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014721071347594261,
|
|
"signal/format_reward/centered_abs_mean": 0.0022705078125,
|
|
"signal/format_reward/group_std_mean": 0.006629125913605094,
|
|
"signal/format_reward/group_zero_std_frac": 0.9625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00113525390625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028927187900990247,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0041770459152758125,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.615898676798679e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.615898676798679e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05018571987748146,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06835311651229858,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005018572043627501,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005018572043627501,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28730441331863404,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3638529360294342,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028730442002415656,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028730442002415656,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.33968669272836216,
|
|
"calibration/batch_distribution_entropy": 0.9772573735760852,
|
|
"calibration/buffer_distribution_entropy": 0.9819020986090996,
|
|
"calibration/confidence_entropy": 0.4644303426340458,
|
|
"calibration/coverage@0%": 0.003520220588235294,
|
|
"calibration/coverage@1%": 0.003520220588235294,
|
|
"calibration/coverage@10%": 0.0948468137254902,
|
|
"calibration/coverage@15%": 0.18452114150454704,
|
|
"calibration/coverage@20%": 0.28779996630501514,
|
|
"calibration/coverage@25%": 0.38054726770845326,
|
|
"calibration/coverage@30%": 0.5279652156239207,
|
|
"calibration/coverage@5%": 0.050554534313725495,
|
|
"calibration/ece": 0.12862788260104924,
|
|
"calibration/mean_confidence": 0.5174480799208958,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 791.6,
|
|
"completions/max_terminated_length": 791.6,
|
|
"completions/mean_length": 172.6880859375,
|
|
"completions/mean_terminated_length": 172.79131469726562,
|
|
"completions/min_length": 37.6,
|
|
"completions/min_terminated_length": 66.6,
|
|
"epoch": 0.256,
|
|
"grad_norm": 0.008825325407087803,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 265053621.0,
|
|
"reward": 0.9096681714057923,
|
|
"reward_std": 0.12087092399597169,
|
|
"rewards/accgated_coverage_0": 0.019006002135574816,
|
|
"rewards/accgated_coverage_1": 0.019006002135574816,
|
|
"rewards/accgated_coverage_10": 0.019006002135574816,
|
|
"rewards/accgated_coverage_15": 0.019006002135574816,
|
|
"rewards/accgated_coverage_20": 0.019006002135574816,
|
|
"rewards/accgated_coverage_25": 0.019006002135574816,
|
|
"rewards/accgated_coverage_5": 0.019006002135574816,
|
|
"rewards/accuracy_reward": 0.490234375,
|
|
"rewards/brier_reward": 0.7554831981658936,
|
|
"rewards/confidence_uniqueness_reward": 0.9535634636878967,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_aurc_reward": -0.00338795306161046,
|
|
"rewards/frontier_ece_reward": 0.018822862207889555,
|
|
"rewards/frontier_entropy_batch_reward": -0.21009528636932373,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.05554577559232712,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.07183501571416855,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005554577801376581,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005554577801376581,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.05554577559232712,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.07183501571416855,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005554577801376581,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005554577801376581,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.05554577559232712,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.07183501571416855,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005554577801376581,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005554577801376581,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.05554577559232712,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.07183501571416855,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005554577801376581,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005554577801376581,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.05554577559232712,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.07183501571416855,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005554577801376581,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005554577801376581,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.05554577559232712,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.07183501571416855,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005554577801376581,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005554577801376581,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.05554577559232712,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.07183501571416855,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005554577801376581,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005554577801376581,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14427490234375,
|
|
"signal/accuracy_reward/group_std_mean": 0.18380638659000398,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.496875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.072137451171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.072137451171875,
|
|
"signal/advantage_abs_mean": 0.0953369528055191,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0953369528055191,
|
|
"signal/advantage_pre_scale_std": 0.13637956976890564,
|
|
"signal/advantage_std": 0.13637956976890564,
|
|
"signal/brier_reward/centered_abs_mean": 0.1861722558736801,
|
|
"signal/brier_reward/group_std_mean": 0.23275550901889802,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018617226183414458,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018617226183414458,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01430168692022562,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.019617033004760743,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014301687711849808,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014301687711849808,
|
|
"signal/format_reward/centered_abs_mean": 0.00177001953125,
|
|
"signal/format_reward/group_std_mean": 0.003914954606443644,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000885009765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000885009765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028289766516536472,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004063015244901181,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.536220756359398e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.536220756359398e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04593289494514465,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.063157469779253,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004593289457261562,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004593289457261562,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2936802178621292,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3710300087928772,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029368022084236146,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029368022084236146,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4089425308725524,
|
|
"calibration/batch_distribution_entropy": 0.9837127563451269,
|
|
"calibration/buffer_distribution_entropy": 0.9834328797869645,
|
|
"calibration/confidence_entropy": 0.4876595541216691,
|
|
"calibration/coverage@0%": 0.007831631796554238,
|
|
"calibration/coverage@1%": 0.007831631796554238,
|
|
"calibration/coverage@10%": 0.00939718952649553,
|
|
"calibration/coverage@15%": 0.01213845041441234,
|
|
"calibration/coverage@20%": 0.06687872323586969,
|
|
"calibration/coverage@25%": 0.2221628343118069,
|
|
"calibration/coverage@30%": 0.3183875091132343,
|
|
"calibration/coverage@5%": 0.007831631796554238,
|
|
"calibration/ece": 0.1605411700914821,
|
|
"calibration/mean_confidence": 0.5151936674590009,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00068359375,
|
|
"completions/max_length": 509.0,
|
|
"completions/max_terminated_length": 509.0,
|
|
"completions/mean_length": 179.36767578125,
|
|
"completions/mean_terminated_length": 179.49051818847656,
|
|
"completions/min_length": 13.6,
|
|
"completions/min_terminated_length": 72.6,
|
|
"epoch": 0.272,
|
|
"grad_norm": 0.006936948746442795,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0014,
|
|
"num_tokens": 281856042.0,
|
|
"reward": 0.9139393091201782,
|
|
"reward_std": 0.1179785043001175,
|
|
"rewards/accgated_coverage_0": 0.021143794246017932,
|
|
"rewards/accgated_coverage_1": 0.021143794246017932,
|
|
"rewards/accgated_coverage_10": 0.021143794246017932,
|
|
"rewards/accgated_coverage_15": 0.021143794246017932,
|
|
"rewards/accgated_coverage_20": 0.021143794246017932,
|
|
"rewards/accgated_coverage_25": 0.021143794246017932,
|
|
"rewards/accgated_coverage_5": 0.021143794246017932,
|
|
"rewards/accuracy_reward": 0.4890625,
|
|
"rewards/brier_reward": 0.7583757877349854,
|
|
"rewards/confidence_uniqueness_reward": 0.9539362549781799,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.0031724351458251475,
|
|
"rewards/frontier_ece_reward": 0.016499579697847367,
|
|
"rewards/frontier_entropy_batch_reward": -0.17843463122844697,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.0536054477095604,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06919381469488144,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0053605446591973305,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0053605446591973305,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.0536054477095604,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06919381469488144,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0053605446591973305,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0053605446591973305,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.0536054477095604,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06919381469488144,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0053605446591973305,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0053605446591973305,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.0536054477095604,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06919381469488144,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0053605446591973305,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0053605446591973305,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.0536054477095604,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.06919381469488144,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0053605446591973305,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0053605446591973305,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.0536054477095604,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.06919381469488144,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0053605446591973305,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0053605446591973305,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.0536054477095604,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06919381469488144,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0053605446591973305,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0053605446591973305,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1404296875,
|
|
"signal/accuracy_reward/group_std_mean": 0.17963421940803528,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07021484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07021484375,
|
|
"signal/advantage_abs_mean": 0.09316650480031967,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09316650480031967,
|
|
"signal/advantage_pre_scale_std": 0.13246660977602004,
|
|
"signal/advantage_std": 0.13246660977602004,
|
|
"signal/brier_reward/centered_abs_mean": 0.17635450959205629,
|
|
"signal/brier_reward/group_std_mean": 0.22242403626441956,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01763545088469982,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01763545088469982,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013126727193593979,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01895910929888487,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013126727426424623,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013126727426424623,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_std_mean": 0.004419417260214687,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024571210611611604,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035782069433480503,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0714013701071965e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0714013701071965e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03977171406149864,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.055351509153842925,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0039771712385118,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0039771712385118,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2645086497068405,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34457975029945376,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026450866460800172,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026450866460800172,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3687604260891969,
|
|
"calibration/batch_distribution_entropy": 0.9891861733002207,
|
|
"calibration/buffer_distribution_entropy": 0.9852262916544937,
|
|
"calibration/confidence_entropy": 0.49639468802077724,
|
|
"calibration/coverage@0%": 0.008625955285295214,
|
|
"calibration/coverage@1%": 0.008625955285295214,
|
|
"calibration/coverage@10%": 0.06206406923421467,
|
|
"calibration/coverage@15%": 0.08563970774109089,
|
|
"calibration/coverage@20%": 0.10606885202845842,
|
|
"calibration/coverage@25%": 0.1616404324651383,
|
|
"calibration/coverage@30%": 0.2792983287733709,
|
|
"calibration/coverage@5%": 0.037309648801994624,
|
|
"calibration/ece": 0.11583087225513293,
|
|
"calibration/mean_confidence": 0.5170620981196054,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 737.4,
|
|
"completions/max_terminated_length": 737.4,
|
|
"completions/mean_length": 179.5173828125,
|
|
"completions/mean_terminated_length": 179.58653564453124,
|
|
"completions/min_length": 29.6,
|
|
"completions/min_terminated_length": 73.2,
|
|
"epoch": 0.288,
|
|
"grad_norm": 0.0012688508722931147,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 298652476.0,
|
|
"reward": 0.9159511685371399,
|
|
"reward_std": 0.11746599227190017,
|
|
"rewards/accgated_coverage_0": 0.02089243084192276,
|
|
"rewards/accgated_coverage_1": 0.02089243084192276,
|
|
"rewards/accgated_coverage_10": 0.02089243084192276,
|
|
"rewards/accgated_coverage_15": 0.02089243084192276,
|
|
"rewards/accgated_coverage_20": 0.02089243084192276,
|
|
"rewards/accgated_coverage_25": 0.02089243084192276,
|
|
"rewards/accgated_coverage_5": 0.02089243084192276,
|
|
"rewards/accuracy_reward": 0.49248046875,
|
|
"rewards/brier_reward": 0.7615193486213684,
|
|
"rewards/confidence_uniqueness_reward": 0.9535805463790894,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_aurc_reward": -0.003121078945696354,
|
|
"rewards/frontier_ece_reward": 0.01654744055122137,
|
|
"rewards/frontier_entropy_batch_reward": -0.1755122125148773,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.055193740874528885,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.0715998388826847,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005519374087452888,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005519374087452888,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.055193740874528885,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.0715998388826847,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005519374087452888,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005519374087452888,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.055193740874528885,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.0715998388826847,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005519374087452888,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005519374087452888,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.055193740874528885,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.0715998388826847,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005519374087452888,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005519374087452888,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.055193740874528885,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.0715998388826847,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005519374087452888,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005519374087452888,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.055193740874528885,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.0715998388826847,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005519374087452888,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005519374087452888,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.055193740874528885,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.0715998388826847,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005519374087452888,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005519374087452888,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.144854736328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.19162459969520568,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.45625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0724273681640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0724273681640625,
|
|
"signal/advantage_abs_mean": 0.09115136116743087,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09115136116743087,
|
|
"signal/advantage_pre_scale_std": 0.13110833764076232,
|
|
"signal/advantage_std": 0.13110833764076232,
|
|
"signal/brier_reward/centered_abs_mean": 0.1735696941614151,
|
|
"signal/brier_reward/group_std_mean": 0.22003813683986664,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01735696941614151,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01735696941614151,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013291171565651894,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01954982727766037,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001329117128625512,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001329117128625512,
|
|
"signal/format_reward/centered_abs_mean": 0.00189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0055242716800421475,
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002382648875936866,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035583035554736854,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9783111676806585e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9783111676806585e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03827905729413032,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05356697663664818,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003827905748039484,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003827905748039484,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2567889988422394,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3347454309463501,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025678900256752967,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025678900256752967,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3405218322089727,
|
|
"calibration/batch_distribution_entropy": 0.9821722305232516,
|
|
"calibration/buffer_distribution_entropy": 0.9865708850066515,
|
|
"calibration/confidence_entropy": 0.48427231850411545,
|
|
"calibration/coverage@0%": 0.0015625,
|
|
"calibration/coverage@1%": 0.0015625,
|
|
"calibration/coverage@10%": 0.015649509803921567,
|
|
"calibration/coverage@15%": 0.06487591911764705,
|
|
"calibration/coverage@20%": 0.19340226715686276,
|
|
"calibration/coverage@25%": 0.32831316130904414,
|
|
"calibration/coverage@30%": 0.42801504163309156,
|
|
"calibration/coverage@5%": 0.0015625,
|
|
"calibration/ece": 0.12548018093425042,
|
|
"calibration/mean_confidence": 0.5260518163695782,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 602.8,
|
|
"completions/max_terminated_length": 602.8,
|
|
"completions/mean_length": 183.88154296875,
|
|
"completions/mean_terminated_length": 183.9718444824219,
|
|
"completions/min_length": 17.4,
|
|
"completions/min_terminated_length": 78.4,
|
|
"epoch": 0.304,
|
|
"grad_norm": 0.0011141430586576462,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0002,
|
|
"num_tokens": 315465375.0,
|
|
"reward": 0.9172628998756409,
|
|
"reward_std": 0.11238628774881362,
|
|
"rewards/accgated_coverage_0": 0.021862666122615337,
|
|
"rewards/accgated_coverage_1": 0.021862666122615337,
|
|
"rewards/accgated_coverage_10": 0.021862666122615337,
|
|
"rewards/accgated_coverage_15": 0.021862666122615337,
|
|
"rewards/accgated_coverage_20": 0.021862666122615337,
|
|
"rewards/accgated_coverage_25": 0.021862666122615337,
|
|
"rewards/accgated_coverage_5": 0.021862666122615337,
|
|
"rewards/accuracy_reward": 0.49482421875,
|
|
"rewards/brier_reward": 0.751349675655365,
|
|
"rewards/confidence_uniqueness_reward": 0.9536804795265198,
|
|
"rewards/format_reward": 0.99912109375,
|
|
"rewards/frontier_aurc_reward": -0.0032011067494750025,
|
|
"rewards/frontier_ece_reward": 0.013386439438909293,
|
|
"rewards/frontier_entropy_batch_reward": -0.16815277338027954,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.05449363440275192,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06998619660735131,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0054493632167577745,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0054493632167577745,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.05449363440275192,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06998619660735131,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0054493632167577745,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0054493632167577745,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.05449363440275192,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06998619660735131,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0054493632167577745,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0054493632167577745,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.05449363440275192,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06998619660735131,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0054493632167577745,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0054493632167577745,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.05449363440275192,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.06998619660735131,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0054493632167577745,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0054493632167577745,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.05449363440275192,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.06998619660735131,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0054493632167577745,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0054493632167577745,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.05449363440275192,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06998619660735131,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0054493632167577745,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0054493632167577745,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.133612060546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.17452629208564757,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.503125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0668060302734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0668060302734375,
|
|
"signal/advantage_abs_mean": 0.08762804120779037,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08762804120779037,
|
|
"signal/advantage_pre_scale_std": 0.12492723762989044,
|
|
"signal/advantage_std": 0.12492723762989044,
|
|
"signal/brier_reward/centered_abs_mean": 0.17246018946170807,
|
|
"signal/brier_reward/group_std_mean": 0.2173982620239258,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01724601909518242,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01724601909518242,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013059911504387855,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.019081205874681473,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013059912016615272,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013059912016615272,
|
|
"signal/format_reward/centered_abs_mean": 0.001702880859375,
|
|
"signal/format_reward/group_std_mean": 0.004971844423562288,
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024050343316048385,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003545998828485608,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0062928271945565e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0062928271945565e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03649954423308373,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05162616893649101,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036499544978141783,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036499544978141783,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24413655698299408,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3214601457118988,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024413655698299407,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024413655698299407,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2793672778316487,
|
|
"calibration/batch_distribution_entropy": 0.9858310757451532,
|
|
"calibration/buffer_distribution_entropy": 0.9873953350221905,
|
|
"calibration/confidence_entropy": 0.47684266949468457,
|
|
"calibration/coverage@0%": 0.02150807240704501,
|
|
"calibration/coverage@1%": 0.02150807240704501,
|
|
"calibration/coverage@10%": 0.13804504036203522,
|
|
"calibration/coverage@15%": 0.2718092588062622,
|
|
"calibration/coverage@20%": 0.3977823813600783,
|
|
"calibration/coverage@25%": 0.4634929977984344,
|
|
"calibration/coverage@30%": 0.5448423740215265,
|
|
"calibration/coverage@5%": 0.043004678326810174,
|
|
"calibration/ece": 0.14391048489074354,
|
|
"calibration/mean_confidence": 0.5321919492656221,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0009765625,
|
|
"completions/max_length": 844.6,
|
|
"completions/max_terminated_length": 844.6,
|
|
"completions/mean_length": 182.83623046875,
|
|
"completions/mean_terminated_length": 183.01766967773438,
|
|
"completions/min_length": 15.0,
|
|
"completions/min_terminated_length": 83.4,
|
|
"epoch": 0.32,
|
|
"grad_norm": 0.0008273598505184054,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0009,
|
|
"num_tokens": 332426322.0,
|
|
"reward": 0.9257388591766358,
|
|
"reward_std": 0.10454508364200592,
|
|
"rewards/accgated_coverage_0": 0.01682482985779643,
|
|
"rewards/accgated_coverage_1": 0.01682482985779643,
|
|
"rewards/accgated_coverage_10": 0.01682482985779643,
|
|
"rewards/accgated_coverage_15": 0.01682482985779643,
|
|
"rewards/accgated_coverage_20": 0.01682482985779643,
|
|
"rewards/accgated_coverage_25": 0.01682482985779643,
|
|
"rewards/accgated_coverage_5": 0.01682482985779643,
|
|
"rewards/accuracy_reward": 0.516015625,
|
|
"rewards/brier_reward": 0.7683913350105286,
|
|
"rewards/confidence_uniqueness_reward": 0.9535138487815857,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_aurc_reward": -0.0029093018732964993,
|
|
"rewards/frontier_ece_reward": 0.017980808019638063,
|
|
"rewards/frontier_entropy_batch_reward": -0.17510271370410918,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.055274638906121255,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.07174940705299378,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005527463788166642,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005527463788166642,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.055274638906121255,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.07174940705299378,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005527463788166642,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005527463788166642,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.055274638906121255,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.07174940705299378,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005527463788166642,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005527463788166642,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.055274638906121255,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.07174940705299378,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005527463788166642,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005527463788166642,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.055274638906121255,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.07174940705299378,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005527463788166642,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005527463788166642,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.055274638906121255,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.07174940705299378,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005527463788166642,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005527463788166642,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.055274638906121255,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.07174940705299378,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005527463788166642,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005527463788166642,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1016845703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.14366158843040466,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.553125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05084228515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05084228515625,
|
|
"signal/advantage_abs_mean": 0.07974396646022797,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07974396646022797,
|
|
"signal/advantage_pre_scale_std": 0.11739609837532043,
|
|
"signal/advantage_std": 0.11739609837532043,
|
|
"signal/brier_reward/centered_abs_mean": 0.16091605126857758,
|
|
"signal/brier_reward/group_std_mean": 0.20566837787628173,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016091605462133885,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016091605462133885,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013066734373569488,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018530824780464174,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001306673465296626,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001306673465296626,
|
|
"signal/format_reward/centered_abs_mean": 0.00184326171875,
|
|
"signal/format_reward/group_std_mean": 0.004456133488565684,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000921630859375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000921630859375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002345598582178354,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0034848656971007584,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9319982422748582e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9319982422748582e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03649588227272034,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05092453882098198,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036495882552117108,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036495882552117108,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2617928504943848,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34030061960220337,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02617928609251976,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02617928609251976,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"eval_calibration/aurc": 0.4755305499734114,
|
|
"eval_calibration/batch_distribution_entropy": 0.946233855720068,
|
|
"eval_calibration/buffer_distribution_entropy": 0.987774627262672,
|
|
"eval_calibration/confidence_entropy": 0.49974650757893324,
|
|
"eval_calibration/coverage@0%": 0.046875,
|
|
"eval_calibration/coverage@1%": 0.046875,
|
|
"eval_calibration/coverage@10%": 0.046875,
|
|
"eval_calibration/coverage@15%": 0.046875,
|
|
"eval_calibration/coverage@20%": 0.0703125,
|
|
"eval_calibration/coverage@25%": 0.0703125,
|
|
"eval_calibration/coverage@30%": 0.0703125,
|
|
"eval_calibration/coverage@5%": 0.046875,
|
|
"eval_calibration/ece": 0.21127313848542387,
|
|
"eval_calibration/mean_confidence": 0.47564682886743537,
|
|
"eval_completions/clipped_ratio": 0.001953125,
|
|
"eval_completions/max_length": 365.25,
|
|
"eval_completions/max_terminated_length": 365.25,
|
|
"eval_completions/mean_length": 184.06418228149414,
|
|
"eval_completions/mean_terminated_length": 184.4076385498047,
|
|
"eval_completions/min_length": 79.25,
|
|
"eval_completions/min_terminated_length": 98.25,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 332426322.0,
|
|
"eval_reward": 0.7920490056276321,
|
|
"eval_reward_std": 0.22250742837786674,
|
|
"eval_rewards/accgated_coverage_0": 0.028197373263537884,
|
|
"eval_rewards/accgated_coverage_1": 0.028197373263537884,
|
|
"eval_rewards/accgated_coverage_10": 0.028197373263537884,
|
|
"eval_rewards/accgated_coverage_15": 0.028197373263537884,
|
|
"eval_rewards/accgated_coverage_20": 0.028197373263537884,
|
|
"eval_rewards/accgated_coverage_25": 0.028197373263537884,
|
|
"eval_rewards/accgated_coverage_5": 0.028197373263537884,
|
|
"eval_rewards/accuracy_reward": 0.41015625,
|
|
"eval_rewards/brier_reward": 0.7799021005630493,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8919402062892914,
|
|
"eval_rewards/format_reward": 0.99609375,
|
|
"eval_rewards/frontier_aurc_reward": -0.0031025345670059323,
|
|
"eval_rewards/frontier_ece_reward": 0.016497689532116055,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.99609375,
|
|
"eval_runtime": 29.4624,
|
|
"eval_samples_per_second": 16.971,
|
|
"eval_signal/accgated_coverage_0/centered_abs_mean": 0.06307912431657314,
|
|
"eval_signal/accgated_coverage_0/group_std_mean": 0.07854281552135944,
|
|
"eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.006307912524789572,
|
|
"eval_signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.006307912524789572,
|
|
"eval_signal/accgated_coverage_1/centered_abs_mean": 0.06307912431657314,
|
|
"eval_signal/accgated_coverage_1/group_std_mean": 0.07854281552135944,
|
|
"eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.006307912524789572,
|
|
"eval_signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.006307912524789572,
|
|
"eval_signal/accgated_coverage_10/centered_abs_mean": 0.06307912431657314,
|
|
"eval_signal/accgated_coverage_10/group_std_mean": 0.07854281552135944,
|
|
"eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.006307912524789572,
|
|
"eval_signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.006307912524789572,
|
|
"eval_signal/accgated_coverage_15/centered_abs_mean": 0.06307912431657314,
|
|
"eval_signal/accgated_coverage_15/group_std_mean": 0.07854281552135944,
|
|
"eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006307912524789572,
|
|
"eval_signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006307912524789572,
|
|
"eval_signal/accgated_coverage_20/centered_abs_mean": 0.06307912431657314,
|
|
"eval_signal/accgated_coverage_20/group_std_mean": 0.07854281552135944,
|
|
"eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.006307912524789572,
|
|
"eval_signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.006307912524789572,
|
|
"eval_signal/accgated_coverage_25/centered_abs_mean": 0.06307912431657314,
|
|
"eval_signal/accgated_coverage_25/group_std_mean": 0.07854281552135944,
|
|
"eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.006307912524789572,
|
|
"eval_signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.006307912524789572,
|
|
"eval_signal/accgated_coverage_5/centered_abs_mean": 0.06307912431657314,
|
|
"eval_signal/accgated_coverage_5/group_std_mean": 0.07854281552135944,
|
|
"eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.006307912524789572,
|
|
"eval_signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.006307912524789572,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.471435546875,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4929209053516388,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2357177734375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2357177734375,
|
|
"eval_signal/advantage_abs_mean": 0.19946623593568802,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.19946623593568802,
|
|
"eval_signal/advantage_pre_scale_std": 0.22014939039945602,
|
|
"eval_signal/advantage_std": 0.22014939039945602,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.20339667797088623,
|
|
"eval_signal/brier_reward/group_std_mean": 0.254949651658535,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020339668728411198,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.020339668728411198,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.045136974193155766,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06305716466158628,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0045136973494663835,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045136973494663835,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.007568359375,
|
|
"eval_signal/format_reward/group_std_mean": 0.022097086533904076,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.875,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0037841796875,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0037841796875,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003286067338194698,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004906978341750801,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1075841181736905e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1075841181736905e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.036860852502286434,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.05696882400661707,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036860854597762227,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036860854597762227,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.007568359375,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.022097086533904076,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.875,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0007568359724245965,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0007568359724245965,
|
|
"eval_steps_per_second": 0.136,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3150930502080606,
|
|
"calibration/batch_distribution_entropy": 0.9801984768190843,
|
|
"calibration/buffer_distribution_entropy": 0.9893199889362148,
|
|
"calibration/confidence_entropy": 0.49293979634305457,
|
|
"calibration/coverage@0%": 0.00352097602739726,
|
|
"calibration/coverage@1%": 0.00352097602739726,
|
|
"calibration/coverage@10%": 0.03244786570450098,
|
|
"calibration/coverage@15%": 0.10476929427592956,
|
|
"calibration/coverage@20%": 0.1626108427103718,
|
|
"calibration/coverage@25%": 0.3264279598825831,
|
|
"calibration/coverage@30%": 0.5246605919765167,
|
|
"calibration/coverage@5%": 0.009783206947162426,
|
|
"calibration/ece": 0.12490733087500346,
|
|
"calibration/mean_confidence": 0.4861851745112829,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 672.0,
|
|
"completions/max_terminated_length": 672.0,
|
|
"completions/mean_length": 184.3849609375,
|
|
"completions/mean_terminated_length": 184.49216003417968,
|
|
"completions/min_length": 35.2,
|
|
"completions/min_terminated_length": 85.6,
|
|
"epoch": 0.336,
|
|
"grad_norm": 0.024251488968729973,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 349036856.0,
|
|
"reward": 0.9335217475891113,
|
|
"reward_std": 0.10637302547693253,
|
|
"rewards/accgated_coverage_0": 0.016940949019044638,
|
|
"rewards/accgated_coverage_1": 0.016940949019044638,
|
|
"rewards/accgated_coverage_10": 0.016940949019044638,
|
|
"rewards/accgated_coverage_15": 0.016940949019044638,
|
|
"rewards/accgated_coverage_20": 0.016940949019044638,
|
|
"rewards/accgated_coverage_25": 0.016940949019044638,
|
|
"rewards/accgated_coverage_5": 0.016940949019044638,
|
|
"rewards/accuracy_reward": 0.53251953125,
|
|
"rewards/brier_reward": 0.7731749534606933,
|
|
"rewards/confidence_uniqueness_reward": 0.9532255172729492,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.002607670472934842,
|
|
"rewards/frontier_ece_reward": 0.01801227778196335,
|
|
"rewards/frontier_entropy_batch_reward": -0.18663570284843445,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.05794127359986305,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.07467034608125686,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005794127332046628,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005794127332046628,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.05794127359986305,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.07467034608125686,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005794127332046628,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005794127332046628,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.05794127359986305,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.07467034608125686,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005794127332046628,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005794127332046628,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.05794127359986305,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.07467034608125686,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005794127332046628,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005794127332046628,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.05794127359986305,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.07467034608125686,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005794127332046628,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005794127332046628,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.05794127359986305,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.07467034608125686,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005794127332046628,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005794127332046628,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.05794127359986305,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.07467034608125686,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005794127332046628,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005794127332046628,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.121795654296875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1620877206325531,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.53125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0608978271484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0608978271484375,
|
|
"signal/advantage_abs_mean": 0.08342937678098679,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08342937678098679,
|
|
"signal/advantage_pre_scale_std": 0.11973823308944702,
|
|
"signal/advantage_std": 0.11973823308944702,
|
|
"signal/brier_reward/centered_abs_mean": 0.15785402953624725,
|
|
"signal/brier_reward/group_std_mean": 0.2000586748123169,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015785403177142145,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015785403177142145,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01279226690530777,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017774837836623193,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012792267836630345,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012792267836630345,
|
|
"signal/format_reward/centered_abs_mean": 0.001312255859375,
|
|
"signal/format_reward/group_std_mean": 0.0035306816454976795,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021891113370656966,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003217978123575449,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7363891786080784e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7363891786080784e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0332314558327198,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.046575964987277986,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0033231456764042377,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0033231456764042377,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2611107021570206,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3386889636516571,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026111070811748505,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026111070811748505,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3305907231466021,
|
|
"calibration/batch_distribution_entropy": 0.9634706351613571,
|
|
"calibration/buffer_distribution_entropy": 0.9940211391579403,
|
|
"calibration/confidence_entropy": 0.454238899033483,
|
|
"calibration/coverage@0%": 0.010160072162426615,
|
|
"calibration/coverage@1%": 0.010160072162426615,
|
|
"calibration/coverage@10%": 0.12696076932485323,
|
|
"calibration/coverage@15%": 0.25279552959882584,
|
|
"calibration/coverage@20%": 0.3411264677103718,
|
|
"calibration/coverage@25%": 0.4102907901174168,
|
|
"calibration/coverage@30%": 0.4845355308219178,
|
|
"calibration/coverage@5%": 0.042972572162426614,
|
|
"calibration/ece": 0.11313356682716007,
|
|
"calibration/mean_confidence": 0.4546359315943291,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 451.2,
|
|
"completions/max_terminated_length": 451.2,
|
|
"completions/mean_length": 185.22998046875,
|
|
"completions/mean_terminated_length": 185.33853149414062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 88.6,
|
|
"epoch": 0.352,
|
|
"grad_norm": 0.0009761779219843447,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0002,
|
|
"num_tokens": 366194027.0,
|
|
"reward": 0.9029169917106629,
|
|
"reward_std": 0.1013871669769287,
|
|
"rewards/accgated_coverage_0": 0.02567037269473076,
|
|
"rewards/accgated_coverage_1": 0.02567037269473076,
|
|
"rewards/accgated_coverage_10": 0.02567037269473076,
|
|
"rewards/accgated_coverage_15": 0.02567037269473076,
|
|
"rewards/accgated_coverage_20": 0.02567037269473076,
|
|
"rewards/accgated_coverage_25": 0.02567037269473076,
|
|
"rewards/accgated_coverage_5": 0.02567037269473076,
|
|
"rewards/accuracy_reward": 0.460546875,
|
|
"rewards/brier_reward": 0.773802924156189,
|
|
"rewards/confidence_uniqueness_reward": 0.9526262640953064,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.003255124855786562,
|
|
"rewards/frontier_ece_reward": 0.014954091794788838,
|
|
"rewards/frontier_entropy_batch_reward": -0.1913035809993744,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04605281800031662,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.0581918366253376,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0046052816323935986,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0046052816323935986,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04605281800031662,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.0581918366253376,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0046052816323935986,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0046052816323935986,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04605281800031662,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.0581918366253376,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0046052816323935986,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0046052816323935986,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04605281800031662,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.0581918366253376,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0046052816323935986,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0046052816323935986,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04605281800031662,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.0581918366253376,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0046052816323935986,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0046052816323935986,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.04605281800031662,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.0581918366253376,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0046052816323935986,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0046052816323935986,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04605281800031662,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.0581918366253376,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0046052816323935986,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0046052816323935986,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1187255859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1538717418909073,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.571875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05936279296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05936279296875,
|
|
"signal/advantage_abs_mean": 0.07860026806592942,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07860026806592942,
|
|
"signal/advantage_pre_scale_std": 0.1170931875705719,
|
|
"signal/advantage_std": 0.1170931875705719,
|
|
"signal/brier_reward/centered_abs_mean": 0.1583779364824295,
|
|
"signal/brier_reward/group_std_mean": 0.2012830913066864,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015837793983519078,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015837793983519078,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013373796828091145,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018645089119672775,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013373797060921787,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013373797060921787,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_std_mean": 0.0033145629800856113,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002646684320643544,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0039520672988146545,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.308355480839964e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.308355480839964e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.030535892769694328,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04203937202692032,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003053589351475239,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003053589351475239,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.264906769990921,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3425568282604218,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026490678265690803,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026490678265690803,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.39777015618484624,
|
|
"calibration/batch_distribution_entropy": 0.9746749712088182,
|
|
"calibration/buffer_distribution_entropy": 0.9970537948902933,
|
|
"calibration/confidence_entropy": 0.4622045118054922,
|
|
"calibration/coverage@0%": 0.0035163894324853227,
|
|
"calibration/coverage@1%": 0.0035163894324853227,
|
|
"calibration/coverage@10%": 0.016407014432485323,
|
|
"calibration/coverage@15%": 0.028125764432485323,
|
|
"calibration/coverage@20%": 0.22500076443248532,
|
|
"calibration/coverage@25%": 0.3039093077299413,
|
|
"calibration/coverage@30%": 0.3621124327299413,
|
|
"calibration/coverage@5%": 0.0035163894324853227,
|
|
"calibration/ece": 0.14096880096182346,
|
|
"calibration/mean_confidence": 0.5173561576705936,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00107421875,
|
|
"completions/max_length": 560.2,
|
|
"completions/max_terminated_length": 560.2,
|
|
"completions/mean_length": 184.6703125,
|
|
"completions/mean_terminated_length": 184.86813659667968,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 88.4,
|
|
"epoch": 0.368,
|
|
"grad_norm": 0.0008754940354265273,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.001,
|
|
"num_tokens": 383150523.0,
|
|
"reward": 0.9097236037254334,
|
|
"reward_std": 0.10353504717350007,
|
|
"rewards/accgated_coverage_0": 0.023810245096683502,
|
|
"rewards/accgated_coverage_1": 0.023810245096683502,
|
|
"rewards/accgated_coverage_10": 0.023810245096683502,
|
|
"rewards/accgated_coverage_15": 0.023810245096683502,
|
|
"rewards/accgated_coverage_20": 0.023810245096683502,
|
|
"rewards/accgated_coverage_25": 0.023810245096683502,
|
|
"rewards/accgated_coverage_5": 0.023810245096683502,
|
|
"rewards/accuracy_reward": 0.4810546875,
|
|
"rewards/brier_reward": 0.7758293151855469,
|
|
"rewards/confidence_uniqueness_reward": 0.9516843318939209,
|
|
"rewards/format_reward": 0.99873046875,
|
|
"rewards/frontier_aurc_reward": -0.0034904766362160444,
|
|
"rewards/frontier_ece_reward": 0.014134907722473144,
|
|
"rewards/frontier_entropy_batch_reward": -0.2095736712217331,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04509783834218979,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.058495976775884626,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004509783769026399,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004509783769026399,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04509783834218979,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.058495976775884626,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004509783769026399,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004509783769026399,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04509783834218979,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.058495976775884626,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004509783769026399,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004509783769026399,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04509783834218979,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.058495976775884626,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004509783769026399,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004509783769026399,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04509783834218979,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.058495976775884626,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004509783769026399,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004509783769026399,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.04509783834218979,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.058495976775884626,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004509783769026399,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004509783769026399,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04509783834218979,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.058495976775884626,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004509783769026399,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004509783769026399,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.11456298828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1523550420999527,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.55625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.057281494140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.057281494140625,
|
|
"signal/advantage_abs_mean": 0.07994274199008941,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07994274199008941,
|
|
"signal/advantage_pre_scale_std": 0.12059660255908966,
|
|
"signal/advantage_std": 0.12059660255908966,
|
|
"signal/brier_reward/centered_abs_mean": 0.15113866925239564,
|
|
"signal/brier_reward/group_std_mean": 0.19373134672641754,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01511386651545763,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01511386651545763,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014305031299591065,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.020990825816988946,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014305031159892677,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014305031159892677,
|
|
"signal/format_reward/centered_abs_mean": 0.002423095703125,
|
|
"signal/format_reward/group_std_mean": 0.00617262776941061,
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012115478515625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0012115478515625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003047790750861168,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00454138470813632,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.809738409472629e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.809738409472629e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.027387873083353043,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0378493033349514,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027387873269617558,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027387873269617558,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28054032325744627,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3566042065620422,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028054032102227212,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028054032102227212,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3219032277188316,
|
|
"calibration/batch_distribution_entropy": 0.9666301345587124,
|
|
"calibration/buffer_distribution_entropy": 0.9982393081790967,
|
|
"calibration/confidence_entropy": 0.4560697596889498,
|
|
"calibration/coverage@0%": 0.030471825787401575,
|
|
"calibration/coverage@1%": 0.030471825787401575,
|
|
"calibration/coverage@10%": 0.20312807578740158,
|
|
"calibration/coverage@15%": 0.2687530757874016,
|
|
"calibration/coverage@20%": 0.3023468257874016,
|
|
"calibration/coverage@25%": 0.34924643208661416,
|
|
"calibration/coverage@30%": 0.39534325787401575,
|
|
"calibration/coverage@5%": 0.07969057578740157,
|
|
"calibration/ece": 0.13579660074192376,
|
|
"calibration/mean_confidence": 0.4620772287971846,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00126953125,
|
|
"completions/max_length": 916.6,
|
|
"completions/max_terminated_length": 916.6,
|
|
"completions/mean_length": 186.89169921875,
|
|
"completions/mean_terminated_length": 187.1309844970703,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 83.2,
|
|
"epoch": 0.384,
|
|
"grad_norm": 0.0008244336349889636,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0009,
|
|
"num_tokens": 399920806.0,
|
|
"reward": 0.9279178500175476,
|
|
"reward_std": 0.0995772048830986,
|
|
"rewards/accgated_coverage_0": 0.022253712080419062,
|
|
"rewards/accgated_coverage_1": 0.022253712080419062,
|
|
"rewards/accgated_coverage_10": 0.022253712080419062,
|
|
"rewards/accgated_coverage_15": 0.022253712080419062,
|
|
"rewards/accgated_coverage_20": 0.022253712080419062,
|
|
"rewards/accgated_coverage_25": 0.022253712080419062,
|
|
"rewards/accgated_coverage_5": 0.022253712080419062,
|
|
"rewards/accuracy_reward": 0.51845703125,
|
|
"rewards/brier_reward": 0.786729919910431,
|
|
"rewards/confidence_uniqueness_reward": 0.9509214878082275,
|
|
"rewards/format_reward": 0.9986328125,
|
|
"rewards/frontier_aurc_reward": -0.00305885705165565,
|
|
"rewards/frontier_ece_reward": 0.01511908657848835,
|
|
"rewards/frontier_entropy_batch_reward": -0.214434677362442,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.05044243782758713,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06544317230582238,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005044243857264518,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005044243857264518,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.05044243782758713,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06544317230582238,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005044243857264518,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005044243857264518,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.05044243782758713,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06544317230582238,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005044243857264518,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005044243857264518,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.05044243782758713,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06544317230582238,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005044243857264518,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005044243857264518,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.05044243782758713,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.06544317230582238,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005044243857264518,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005044243857264518,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.05044243782758713,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.06544317230582238,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005044243857264518,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005044243857264518,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.05044243782758713,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06544317230582238,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005044243857264518,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005044243857264518,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.111529541015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1498907119035721,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0557647705078125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0557647705078125,
|
|
"signal/advantage_abs_mean": 0.07648073881864548,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07648073881864548,
|
|
"signal/advantage_pre_scale_std": 0.114204902946949,
|
|
"signal/advantage_std": 0.114204902946949,
|
|
"signal/brier_reward/centered_abs_mean": 0.13833243846893312,
|
|
"signal/brier_reward/group_std_mean": 0.17853497862815856,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01383324433118105,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01383324433118105,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014909646660089492,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.020950117707252504,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014909646706655622,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014909646706655622,
|
|
"signal/format_reward/centered_abs_mean": 0.0025390625,
|
|
"signal/format_reward/group_std_mean": 0.005538491113111377,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00126953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00126953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030949720181524753,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0047924695536494255,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.8687149208271875e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.8687149208271875e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.023026642948389055,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.031030115485191346,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002302664425224066,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002302664425224066,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2783478438854218,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3542932987213135,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027834784239530563,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027834784239530563,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4324407494110575,
|
|
"calibration/batch_distribution_entropy": 0.986381165469108,
|
|
"calibration/buffer_distribution_entropy": 0.9980247081930601,
|
|
"calibration/confidence_entropy": 0.49569577927429453,
|
|
"calibration/coverage@0%": 0.003125764432485323,
|
|
"calibration/coverage@1%": 0.003125764432485323,
|
|
"calibration/coverage@10%": 0.003125764432485323,
|
|
"calibration/coverage@15%": 0.011328889432485324,
|
|
"calibration/coverage@20%": 0.02265701443248532,
|
|
"calibration/coverage@25%": 0.05979849559686888,
|
|
"calibration/coverage@30%": 0.18609955968688846,
|
|
"calibration/coverage@5%": 0.003125764432485323,
|
|
"calibration/ece": 0.1540248613869452,
|
|
"calibration/mean_confidence": 0.5007551271020173,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 499.8,
|
|
"completions/max_terminated_length": 499.8,
|
|
"completions/mean_length": 186.47431640625,
|
|
"completions/mean_terminated_length": 186.58330688476562,
|
|
"completions/min_length": 16.6,
|
|
"completions/min_terminated_length": 92.4,
|
|
"epoch": 0.4,
|
|
"grad_norm": 0.0009725225972943008,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0007,
|
|
"num_tokens": 416866751.0,
|
|
"reward": 0.9153225064277649,
|
|
"reward_std": 0.10575702488422394,
|
|
"rewards/accgated_coverage_0": 0.02327599683776498,
|
|
"rewards/accgated_coverage_1": 0.02327599683776498,
|
|
"rewards/accgated_coverage_10": 0.02327599683776498,
|
|
"rewards/accgated_coverage_15": 0.02327599683776498,
|
|
"rewards/accgated_coverage_20": 0.02327599683776498,
|
|
"rewards/accgated_coverage_25": 0.02327599683776498,
|
|
"rewards/accgated_coverage_5": 0.02327599683776498,
|
|
"rewards/accuracy_reward": 0.4916015625,
|
|
"rewards/brier_reward": 0.7742549061775208,
|
|
"rewards/confidence_uniqueness_reward": 0.9524218559265136,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.0036125687882304193,
|
|
"rewards/frontier_ece_reward": 0.011607270315289497,
|
|
"rewards/frontier_entropy_batch_reward": -0.20212911069393158,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.046710155159235,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.060144589841365816,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004671015590429306,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004671015590429306,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.046710155159235,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.060144589841365816,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004671015590429306,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004671015590429306,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.046710155159235,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.060144589841365816,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004671015590429306,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004671015590429306,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.046710155159235,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.060144589841365816,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004671015590429306,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004671015590429306,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.046710155159235,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.060144589841365816,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004671015590429306,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004671015590429306,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.046710155159235,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.060144589841365816,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004671015590429306,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004671015590429306,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.046710155159235,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.060144589841365816,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004671015590429306,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004671015590429306,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1314697265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.16968526542186738,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.528125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06573486328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06573486328125,
|
|
"signal/advantage_abs_mean": 0.08297596722841263,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08297596722841263,
|
|
"signal/advantage_pre_scale_std": 0.12231777310371399,
|
|
"signal/advantage_std": 0.12231777310371399,
|
|
"signal/brier_reward/centered_abs_mean": 0.14882287085056306,
|
|
"signal/brier_reward/group_std_mean": 0.19086708426475524,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014882288128137588,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014882288128137588,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012974631786346436,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018405388668179513,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001297463197261095,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001297463197261095,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_std_mean": 0.003866990143433213,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003667995473369956,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005854966584593058,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.584994530887343e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.584994530887343e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02002083547413349,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.026638072356581687,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020020836032927035,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020020836032927035,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27425014078617094,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3491857171058655,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027425015717744826,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027425015717744826,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.33193371781089065,
|
|
"calibration/batch_distribution_entropy": 0.978694331339585,
|
|
"calibration/buffer_distribution_entropy": 0.9977751089472529,
|
|
"calibration/confidence_entropy": 0.49877098337715486,
|
|
"calibration/coverage@0%": 0.008203125,
|
|
"calibration/coverage@1%": 0.008203125,
|
|
"calibration/coverage@10%": 0.032421875,
|
|
"calibration/coverage@15%": 0.042578125,
|
|
"calibration/coverage@20%": 0.11484375,
|
|
"calibration/coverage@25%": 0.224609375,
|
|
"calibration/coverage@30%": 0.3953125,
|
|
"calibration/coverage@5%": 0.02109375,
|
|
"calibration/ece": 0.10189078812995633,
|
|
"calibration/mean_confidence": 0.513004817409997,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 464.4,
|
|
"completions/max_terminated_length": 464.4,
|
|
"completions/mean_length": 189.19814453125,
|
|
"completions/mean_terminated_length": 189.2162292480469,
|
|
"completions/min_length": 74.6,
|
|
"completions/min_terminated_length": 91.4,
|
|
"epoch": 0.416,
|
|
"grad_norm": 0.0008635468548163772,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 433685324.0,
|
|
"reward": 0.9197581887245179,
|
|
"reward_std": 0.10262777209281922,
|
|
"rewards/accgated_coverage_0": 0.02481077201664448,
|
|
"rewards/accgated_coverage_1": 0.02481077201664448,
|
|
"rewards/accgated_coverage_10": 0.02481077201664448,
|
|
"rewards/accgated_coverage_15": 0.02481077201664448,
|
|
"rewards/accgated_coverage_20": 0.02481077201664448,
|
|
"rewards/accgated_coverage_25": 0.02481077201664448,
|
|
"rewards/accgated_coverage_5": 0.02481077201664448,
|
|
"rewards/accuracy_reward": 0.494921875,
|
|
"rewards/brier_reward": 0.7818256974220276,
|
|
"rewards/confidence_uniqueness_reward": 0.9530369281768799,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0033155861776322125,
|
|
"rewards/frontier_ece_reward": 0.010740846581757068,
|
|
"rewards/frontier_entropy_batch_reward": -0.1954037606716156,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.047776888310909274,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06069251298904419,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004777689045295119,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004777689045295119,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.047776888310909274,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06069251298904419,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004777689045295119,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004777689045295119,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.047776888310909274,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06069251298904419,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004777689045295119,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004777689045295119,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.047776888310909274,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06069251298904419,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004777689045295119,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004777689045295119,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.047776888310909274,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.06069251298904419,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004777689045295119,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004777689045295119,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.047776888310909274,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.06069251298904419,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004777689045295119,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004777689045295119,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.047776888310909274,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06069251298904419,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004777689045295119,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004777689045295119,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12591552734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.16535796225070953,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.528125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062957763671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.062957763671875,
|
|
"signal/advantage_abs_mean": 0.08131872415542603,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08131872415542603,
|
|
"signal/advantage_pre_scale_std": 0.1183522805571556,
|
|
"signal/advantage_std": 0.1183522805571556,
|
|
"signal/brier_reward/centered_abs_mean": 0.14390135705471038,
|
|
"signal/brier_reward/group_std_mean": 0.18296151161193847,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014390136301517486,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014390136301517486,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012548736296594143,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01589704118669033,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012548736296594143,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012548736296594143,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0033578477799892426,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005520503781735897,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.19730982684996e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.19730982684996e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.017291248589754105,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02277929149568081,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017291248077526689,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017291248077526689,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2703825652599335,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34669106006622313,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027038257196545602,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027038257196545602,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28322062243077284,
|
|
"calibration/batch_distribution_entropy": 0.9741314135591935,
|
|
"calibration/buffer_distribution_entropy": 0.9977025041244729,
|
|
"calibration/confidence_entropy": 0.4597935578261665,
|
|
"calibration/coverage@0%": 0.011730980919765165,
|
|
"calibration/coverage@1%": 0.011730980919765165,
|
|
"calibration/coverage@10%": 0.09582161203522505,
|
|
"calibration/coverage@15%": 0.17944211717221134,
|
|
"calibration/coverage@20%": 0.3279461227984345,
|
|
"calibration/coverage@25%": 0.41315511863992177,
|
|
"calibration/coverage@30%": 0.5660928326810175,
|
|
"calibration/coverage@5%": 0.011730980919765165,
|
|
"calibration/ece": 0.11671168467835376,
|
|
"calibration/mean_confidence": 0.5372104429192117,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 560.0,
|
|
"completions/max_terminated_length": 560.0,
|
|
"completions/mean_length": 189.170703125,
|
|
"completions/mean_terminated_length": 189.28175354003906,
|
|
"completions/min_length": 35.8,
|
|
"completions/min_terminated_length": 90.0,
|
|
"epoch": 0.432,
|
|
"grad_norm": 0.0016940739005804062,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 450636768.0,
|
|
"reward": 0.9407005071640014,
|
|
"reward_std": 0.09624196439981461,
|
|
"rewards/accgated_coverage_0": 0.030050896108150482,
|
|
"rewards/accgated_coverage_1": 0.030050896108150482,
|
|
"rewards/accgated_coverage_10": 0.030050896108150482,
|
|
"rewards/accgated_coverage_15": 0.030050896108150482,
|
|
"rewards/accgated_coverage_20": 0.030050896108150482,
|
|
"rewards/accgated_coverage_25": 0.030050896108150482,
|
|
"rewards/accgated_coverage_5": 0.030050896108150482,
|
|
"rewards/accuracy_reward": 0.5318359375,
|
|
"rewards/brier_reward": 0.8001113057136535,
|
|
"rewards/confidence_uniqueness_reward": 0.951182758808136,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.002726729493588209,
|
|
"rewards/frontier_ece_reward": 0.012993598543107509,
|
|
"rewards/frontier_entropy_batch_reward": -0.22305963337421417,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.051881562918424606,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06760661378502845,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005188156617805362,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005188156617805362,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.051881562918424606,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06760661378502845,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005188156617805362,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005188156617805362,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.051881562918424606,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06760661378502845,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005188156617805362,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005188156617805362,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.051881562918424606,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06760661378502845,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005188156617805362,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005188156617805362,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.051881562918424606,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.06760661378502845,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005188156617805362,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005188156617805362,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.051881562918424606,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.06760661378502845,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005188156617805362,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005188156617805362,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.051881562918424606,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06760661378502845,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005188156617805362,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005188156617805362,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1138427734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.15121517330408096,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.565625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05692138671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05692138671875,
|
|
"signal/advantage_abs_mean": 0.07415008246898651,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07415008246898651,
|
|
"signal/advantage_pre_scale_std": 0.11101247072219848,
|
|
"signal/advantage_std": 0.11101247072219848,
|
|
"signal/brier_reward/centered_abs_mean": 0.1365377575159073,
|
|
"signal/brier_reward/group_std_mean": 0.17550874650478362,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013653775677084923,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013653775677084923,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014425282925367355,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.020028948225080966,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014425283297896385,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014425283297896385,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_std_mean": 0.0038669900968670845,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003307829750701785,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005367651302367449,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1347873047925535e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1347873047925535e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.016411469876766206,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.021296811848878862,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001641146931797266,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001641146931797266,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28924007415771485,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3657317876815796,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02892400659620762,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02892400659620762,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31087928071099336,
|
|
"calibration/batch_distribution_entropy": 0.9721029922152737,
|
|
"calibration/buffer_distribution_entropy": 0.9974103057197008,
|
|
"calibration/confidence_entropy": 0.48614372582595544,
|
|
"calibration/coverage@0%": 0.014067086594911937,
|
|
"calibration/coverage@1%": 0.014067086594911937,
|
|
"calibration/coverage@10%": 0.06722954378669276,
|
|
"calibration/coverage@15%": 0.12155546722113501,
|
|
"calibration/coverage@20%": 0.1950617661448141,
|
|
"calibration/coverage@25%": 0.2666187622309198,
|
|
"calibration/coverage@30%": 0.4077566964285714,
|
|
"calibration/coverage@5%": 0.028520211594911936,
|
|
"calibration/ece": 0.13346354883731723,
|
|
"calibration/mean_confidence": 0.5590761304140146,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 668.8,
|
|
"completions/max_terminated_length": 668.8,
|
|
"completions/mean_length": 197.71796875,
|
|
"completions/mean_terminated_length": 197.77550354003907,
|
|
"completions/min_length": 38.8,
|
|
"completions/min_terminated_length": 94.2,
|
|
"epoch": 0.448,
|
|
"grad_norm": 0.0009933625115081668,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 467614200.0,
|
|
"reward": 0.9261441349983215,
|
|
"reward_std": 0.09479031711816788,
|
|
"rewards/accgated_coverage_0": 0.029728616401553155,
|
|
"rewards/accgated_coverage_1": 0.029728616401553155,
|
|
"rewards/accgated_coverage_10": 0.029728616401553155,
|
|
"rewards/accgated_coverage_15": 0.029728616401553155,
|
|
"rewards/accgated_coverage_20": 0.029728616401553155,
|
|
"rewards/accgated_coverage_25": 0.02971052788197994,
|
|
"rewards/accgated_coverage_5": 0.029728616401553155,
|
|
"rewards/accuracy_reward": 0.50244140625,
|
|
"rewards/brier_reward": 0.7914562702178956,
|
|
"rewards/confidence_uniqueness_reward": 0.9523193597793579,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0033234899397939443,
|
|
"rewards/frontier_ece_reward": 0.009857317991554737,
|
|
"rewards/frontier_entropy_batch_reward": -0.21060078740119934,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.048491771519184115,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.062291745096445084,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004849177319556475,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004849177319556475,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.048491771519184115,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.062291745096445084,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004849177319556475,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004849177319556475,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.048491771519184115,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.062291745096445084,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004849177319556475,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004849177319556475,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.048491771519184115,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.062291745096445084,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004849177319556475,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004849177319556475,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.048491771519184115,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.062291745096445084,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004849177319556475,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004849177319556475,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.048326144367456435,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.062085268646478654,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004832614585757256,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004832614585757256,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.048491771519184115,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.062291745096445084,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004849177319556475,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004849177319556475,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.109490966796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.14455284774303437,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0547454833984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0547454833984375,
|
|
"signal/advantage_abs_mean": 0.07401997745037078,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07401997745037078,
|
|
"signal/advantage_pre_scale_std": 0.1096037745475769,
|
|
"signal/advantage_std": 0.1096037745475769,
|
|
"signal/brier_reward/centered_abs_mean": 0.14020991921424866,
|
|
"signal/brier_reward/group_std_mean": 0.1790821671485901,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014020991884171963,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014020991884171963,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013042146898806095,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017226839996874334,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001304214750416577,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001304214750416577,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00343682668171823,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005777542665600777,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2960334394592795e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2960334394592795e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01519781704992056,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.019867125526070596,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015197818167507649,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015197818167507649,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2822464555501938,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3577677130699158,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028224647045135498,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028224647045135498,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4260284897856108,
|
|
"calibration/batch_distribution_entropy": 0.9851075345712529,
|
|
"calibration/buffer_distribution_entropy": 0.996800334802546,
|
|
"calibration/confidence_entropy": 0.49255589906544583,
|
|
"calibration/coverage@0%": 0.004319169944296346,
|
|
"calibration/coverage@1%": 0.004319169944296346,
|
|
"calibration/coverage@10%": 0.017219022306501068,
|
|
"calibration/coverage@15%": 0.029380685692327844,
|
|
"calibration/coverage@20%": 0.06003090714902076,
|
|
"calibration/coverage@25%": 0.08594441600728847,
|
|
"calibration/coverage@30%": 0.1708299967159499,
|
|
"calibration/coverage@5%": 0.004319169944296346,
|
|
"calibration/ece": 0.14126350573298713,
|
|
"calibration/mean_confidence": 0.4790509413100503,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00087890625,
|
|
"completions/max_length": 794.2,
|
|
"completions/max_terminated_length": 794.2,
|
|
"completions/mean_length": 202.572265625,
|
|
"completions/mean_terminated_length": 202.75009155273438,
|
|
"completions/min_length": 20.8,
|
|
"completions/min_terminated_length": 97.4,
|
|
"epoch": 0.464,
|
|
"grad_norm": 0.0008447124273516238,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0007,
|
|
"num_tokens": 484859356.0,
|
|
"reward": 0.8944996356964111,
|
|
"reward_std": 0.09351845979690551,
|
|
"rewards/accgated_coverage_0": 0.025238432362675665,
|
|
"rewards/accgated_coverage_1": 0.025238432362675665,
|
|
"rewards/accgated_coverage_10": 0.025238432362675665,
|
|
"rewards/accgated_coverage_15": 0.025238432362675665,
|
|
"rewards/accgated_coverage_20": 0.025238432362675665,
|
|
"rewards/accgated_coverage_25": 0.02511085756123066,
|
|
"rewards/accgated_coverage_5": 0.025238432362675665,
|
|
"rewards/accuracy_reward": 0.45126953125,
|
|
"rewards/brier_reward": 0.7709176063537597,
|
|
"rewards/confidence_uniqueness_reward": 0.9511044979095459,
|
|
"rewards/format_reward": 0.99892578125,
|
|
"rewards/frontier_aurc_reward": -0.0037610166240483523,
|
|
"rewards/frontier_ece_reward": 0.007047755550593138,
|
|
"rewards/frontier_entropy_batch_reward": -0.21112147867679595,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.037470953166484834,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.048257572948932646,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0037470953073352577,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0037470953073352577,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.037470953166484834,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.048257572948932646,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0037470953073352577,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0037470953073352577,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.037470953166484834,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.048257572948932646,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0037470953073352577,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0037470953073352577,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.037470953166484834,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.048257572948932646,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0037470953073352577,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0037470953073352577,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.037470953166484834,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.048257572948932646,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0037470953073352577,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0037470953073352577,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.03708973340690136,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.04777633249759674,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.003708973526954651,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.003708973526954651,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.037470953166484834,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.048257572948932646,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0037470953073352577,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0037470953073352577,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.093695068359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1306929975748062,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0468475341796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0468475341796875,
|
|
"signal/advantage_abs_mean": 0.0707702711224556,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0707702711224556,
|
|
"signal/advantage_pre_scale_std": 0.10955794602632522,
|
|
"signal/advantage_std": 0.10955794602632522,
|
|
"signal/brier_reward/centered_abs_mean": 0.14058507978916168,
|
|
"signal/brier_reward/group_std_mean": 0.18082630336284639,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014058507792651654,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014058507792651654,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014609797485172748,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.021237166598439217,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001460979785770178,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001460979785770178,
|
|
"signal/format_reward/centered_abs_mean": 0.002069091796875,
|
|
"signal/format_reward/group_std_mean": 0.005740390298888088,
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003325316496193409,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005579947866499424,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.156645809416659e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.156645809416659e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.012721736542880535,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01700245440006256,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012721736915409566,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012721736915409566,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2733992040157318,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3470227658748627,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02733992077410221,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02733992077410221,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28795121430498016,
|
|
"calibration/batch_distribution_entropy": 0.975302575403972,
|
|
"calibration/buffer_distribution_entropy": 0.9960413492852078,
|
|
"calibration/confidence_entropy": 0.47715076131859047,
|
|
"calibration/coverage@0%": 0.022699822651663405,
|
|
"calibration/coverage@1%": 0.022699822651663405,
|
|
"calibration/coverage@10%": 0.0724062805772994,
|
|
"calibration/coverage@15%": 0.2818646037181996,
|
|
"calibration/coverage@20%": 0.37693554305283755,
|
|
"calibration/coverage@25%": 0.44147734222113505,
|
|
"calibration/coverage@30%": 0.5005488625244618,
|
|
"calibration/coverage@5%": 0.03718123165362035,
|
|
"calibration/ece": 0.15053341353769256,
|
|
"calibration/mean_confidence": 0.49011041477646156,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00107421875,
|
|
"completions/max_length": 679.8,
|
|
"completions/max_terminated_length": 679.8,
|
|
"completions/mean_length": 202.91337890625,
|
|
"completions/mean_terminated_length": 203.13193969726564,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 99.4,
|
|
"epoch": 0.48,
|
|
"grad_norm": 0.0010805472265928984,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0006,
|
|
"num_tokens": 501985221.0,
|
|
"reward": 0.9265949487686157,
|
|
"reward_std": 0.09515149295330047,
|
|
"rewards/accgated_coverage_0": 0.028273304179310798,
|
|
"rewards/accgated_coverage_1": 0.028273304179310798,
|
|
"rewards/accgated_coverage_10": 0.028273304179310798,
|
|
"rewards/accgated_coverage_15": 0.028273304179310798,
|
|
"rewards/accgated_coverage_20": 0.028273304179310798,
|
|
"rewards/accgated_coverage_25": 0.028124842047691345,
|
|
"rewards/accgated_coverage_5": 0.028273304179310798,
|
|
"rewards/accuracy_reward": 0.50966796875,
|
|
"rewards/brier_reward": 0.7756298780441284,
|
|
"rewards/confidence_uniqueness_reward": 0.9513458490371705,
|
|
"rewards/format_reward": 0.99892578125,
|
|
"rewards/frontier_aurc_reward": -0.0029657317558303476,
|
|
"rewards/frontier_ece_reward": 0.007657552417367697,
|
|
"rewards/frontier_entropy_batch_reward": -0.20904635787010192,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.0533129021525383,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.0669943556189537,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00533129028044641,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00533129028044641,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.0533129021525383,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.0669943556189537,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00533129028044641,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00533129028044641,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.0533129021525383,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.0669943556189537,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00533129028044641,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00533129028044641,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.0533129021525383,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.0669943556189537,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00533129028044641,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00533129028044641,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.0533129021525383,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.0669943556189537,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.00533129028044641,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.00533129028044641,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.05262741148471832,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.06615648269653321,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005262741353362799,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005262741353362799,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.0533129021525383,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.0669943556189537,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00533129028044641,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00533129028044641,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.123687744140625,
|
|
"signal/accuracy_reward/group_std_mean": 0.15795093774795532,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.565625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0618438720703125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0618438720703125,
|
|
"signal/advantage_abs_mean": 0.07340479493141175,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07340479493141175,
|
|
"signal/advantage_pre_scale_std": 0.11030431389808655,
|
|
"signal/advantage_std": 0.11030431389808655,
|
|
"signal/brier_reward/centered_abs_mean": 0.1439410626888275,
|
|
"signal/brier_reward/group_std_mean": 0.1817007005214691,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014394106343388557,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014394106343388557,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014166798628866672,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02084354721009731,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014166798675432802,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014166798675432802,
|
|
"signal/format_reward/centered_abs_mean": 0.002081298828125,
|
|
"signal/format_reward/group_std_mean": 0.006076698750257492,
|
|
"signal/format_reward/group_zero_std_frac": 0.965625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002875799732282758,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00463168453425169,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.594749650801532e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.594749650801532e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.013105543702840805,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.017056282609701157,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013105543795973063,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013105543795973063,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2596915900707245,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3349157810211182,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025969159603118897,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025969159603118897,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"eval_calibration/aurc": 0.4669723018480348,
|
|
"eval_calibration/batch_distribution_entropy": 0.919400066519976,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9955915939821773,
|
|
"eval_calibration/confidence_entropy": 0.4693234243555436,
|
|
"eval_calibration/coverage@0%": 0.0625,
|
|
"eval_calibration/coverage@1%": 0.0625,
|
|
"eval_calibration/coverage@10%": 0.0625,
|
|
"eval_calibration/coverage@15%": 0.0625,
|
|
"eval_calibration/coverage@20%": 0.171875,
|
|
"eval_calibration/coverage@25%": 0.1953125,
|
|
"eval_calibration/coverage@30%": 0.2890625,
|
|
"eval_calibration/coverage@5%": 0.0625,
|
|
"eval_calibration/ece": 0.18956857599273347,
|
|
"eval_calibration/mean_confidence": 0.4655063869304471,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 486.0,
|
|
"eval_completions/max_terminated_length": 486.0,
|
|
"eval_completions/mean_length": 202.505859375,
|
|
"eval_completions/mean_terminated_length": 202.505859375,
|
|
"eval_completions/min_length": 109.0,
|
|
"eval_completions/min_terminated_length": 109.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 501985221.0,
|
|
"eval_reward": 0.7895693480968475,
|
|
"eval_reward_std": 0.2150205746293068,
|
|
"eval_rewards/accgated_coverage_0": 0.028790025506168604,
|
|
"eval_rewards/accgated_coverage_1": 0.028790025506168604,
|
|
"eval_rewards/accgated_coverage_10": 0.028790025506168604,
|
|
"eval_rewards/accgated_coverage_15": 0.028790025506168604,
|
|
"eval_rewards/accgated_coverage_20": 0.028790025506168604,
|
|
"eval_rewards/accgated_coverage_25": 0.028504140209406614,
|
|
"eval_rewards/accgated_coverage_5": 0.028790025506168604,
|
|
"eval_rewards/accuracy_reward": 0.400390625,
|
|
"eval_rewards/brier_reward": 0.7909315228462219,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8935546875,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.003576983988750726,
|
|
"eval_rewards/frontier_ece_reward": 0.00845679291523993,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 23.3411,
|
|
"eval_samples_per_second": 21.421,
|
|
"eval_signal/accgated_coverage_0/centered_abs_mean": 0.05911190249025822,
|
|
"eval_signal/accgated_coverage_0/group_std_mean": 0.07272995077073574,
|
|
"eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005911190528422594,
|
|
"eval_signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005911190528422594,
|
|
"eval_signal/accgated_coverage_1/centered_abs_mean": 0.05911190249025822,
|
|
"eval_signal/accgated_coverage_1/group_std_mean": 0.07272995077073574,
|
|
"eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005911190528422594,
|
|
"eval_signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005911190528422594,
|
|
"eval_signal/accgated_coverage_10/centered_abs_mean": 0.05911190249025822,
|
|
"eval_signal/accgated_coverage_10/group_std_mean": 0.07272995077073574,
|
|
"eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005911190528422594,
|
|
"eval_signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005911190528422594,
|
|
"eval_signal/accgated_coverage_15/centered_abs_mean": 0.05911190249025822,
|
|
"eval_signal/accgated_coverage_15/group_std_mean": 0.07272995077073574,
|
|
"eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005911190528422594,
|
|
"eval_signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005911190528422594,
|
|
"eval_signal/accgated_coverage_20/centered_abs_mean": 0.05911190249025822,
|
|
"eval_signal/accgated_coverage_20/group_std_mean": 0.07272995077073574,
|
|
"eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005911190528422594,
|
|
"eval_signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005911190528422594,
|
|
"eval_signal/accgated_coverage_25/centered_abs_mean": 0.0584586663171649,
|
|
"eval_signal/accgated_coverage_25/group_std_mean": 0.07195482775568962,
|
|
"eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005845866515301168,
|
|
"eval_signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005845866515301168,
|
|
"eval_signal/accgated_coverage_5/centered_abs_mean": 0.05911190249025822,
|
|
"eval_signal/accgated_coverage_5/group_std_mean": 0.07272995077073574,
|
|
"eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005911190528422594,
|
|
"eval_signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005911190528422594,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4656982421875,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.48971545696258545,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23284912109375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23284912109375,
|
|
"eval_signal/advantage_abs_mean": 0.1962505839765072,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.1962505839765072,
|
|
"eval_signal/advantage_pre_scale_std": 0.21267764642834663,
|
|
"eval_signal/advantage_std": 0.21267764642834663,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.19743013009428978,
|
|
"eval_signal/brier_reward/group_std_mean": 0.25094591453671455,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019743012730032206,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019743012730032206,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0434112548828125,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05236371420323849,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004341125604696572,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004341125604696572,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004231699742376804,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008058041683398187,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.2896246415912174e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.2896246415912174e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.014492101734504104,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.02034526690840721,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014492101909127086,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014492101909127086,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.171,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.39311472877279846,
|
|
"calibration/batch_distribution_entropy": 0.9818095415642116,
|
|
"calibration/buffer_distribution_entropy": 0.995423724980441,
|
|
"calibration/confidence_entropy": 0.4736793499329807,
|
|
"calibration/coverage@0%": 0.009375,
|
|
"calibration/coverage@1%": 0.009375,
|
|
"calibration/coverage@10%": 0.094140625,
|
|
"calibration/coverage@15%": 0.15859375,
|
|
"calibration/coverage@20%": 0.210546875,
|
|
"calibration/coverage@25%": 0.242578125,
|
|
"calibration/coverage@30%": 0.278515625,
|
|
"calibration/coverage@5%": 0.0234375,
|
|
"calibration/ece": 0.13816226354604164,
|
|
"calibration/mean_confidence": 0.5170572832163598,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00068359375,
|
|
"completions/max_length": 656.8,
|
|
"completions/max_terminated_length": 656.8,
|
|
"completions/mean_length": 201.83125,
|
|
"completions/mean_terminated_length": 201.97061157226562,
|
|
"completions/min_length": 40.6,
|
|
"completions/min_terminated_length": 100.2,
|
|
"epoch": 0.496,
|
|
"grad_norm": 0.0009272120660170913,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0005,
|
|
"num_tokens": 519359813.0,
|
|
"reward": 0.937790048122406,
|
|
"reward_std": 0.09639699459075927,
|
|
"rewards/accgated_coverage_0": 0.023583621345460416,
|
|
"rewards/accgated_coverage_1": 0.023583621345460416,
|
|
"rewards/accgated_coverage_10": 0.023583621345460416,
|
|
"rewards/accgated_coverage_15": 0.023583621345460416,
|
|
"rewards/accgated_coverage_20": 0.023583448119461538,
|
|
"rewards/accgated_coverage_25": 0.02300034649670124,
|
|
"rewards/accgated_coverage_5": 0.023583621345460416,
|
|
"rewards/accuracy_reward": 0.53125,
|
|
"rewards/brier_reward": 0.7865365624427796,
|
|
"rewards/confidence_uniqueness_reward": 0.953377628326416,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.0031201265286654235,
|
|
"rewards/frontier_ece_reward": 0.00805421993136406,
|
|
"rewards/frontier_entropy_batch_reward": -0.18652375936508178,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.050162599235773084,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06499579325318336,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005016259755939246,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005016259755939246,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.050162599235773084,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06499579325318336,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005016259755939246,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005016259755939246,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.050162599235773084,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06499579325318336,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005016259755939246,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005016259755939246,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.050162599235773084,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06499579325318336,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005016259755939246,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005016259755939246,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.05015862360596657,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.06499083563685418,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005015862174332142,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005015862174332142,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.04882904663681984,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.06331825703382492,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004882904980331659,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004882904980331659,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.050162599235773084,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06499579325318336,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005016259755939246,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005016259755939246,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1035400390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1395555779337883,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05177001953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05177001953125,
|
|
"signal/advantage_abs_mean": 0.07500568479299545,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07500568479299545,
|
|
"signal/advantage_pre_scale_std": 0.11144567281007767,
|
|
"signal/advantage_std": 0.11144567281007767,
|
|
"signal/brier_reward/centered_abs_mean": 0.1344868689775467,
|
|
"signal/brier_reward/group_std_mean": 0.173132461309433,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013448686897754669,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013448686897754669,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012536250613629818,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018192836456000804,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012536250753328205,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012536250753328205,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_std_mean": 0.004419417306780815,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0033366796094924213,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005428153648972511,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1708495700731874e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1708495700731874e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.012212376296520232,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01619528718292713,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001221237680874765,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001221237680874765,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2617306888103485,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33620988130569457,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026173070073127747,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026173070073127747,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3150095805056489,
|
|
"calibration/batch_distribution_entropy": 0.9817100467714981,
|
|
"calibration/buffer_distribution_entropy": 0.9952144938154825,
|
|
"calibration/confidence_entropy": 0.49986996249011506,
|
|
"calibration/coverage@0%": 0.024613197162426614,
|
|
"calibration/coverage@1%": 0.024613197162426614,
|
|
"calibration/coverage@10%": 0.18164444716242661,
|
|
"calibration/coverage@15%": 0.2511756971624266,
|
|
"calibration/coverage@20%": 0.3281479329745597,
|
|
"calibration/coverage@25%": 0.398120260518591,
|
|
"calibration/coverage@30%": 0.5111217282289628,
|
|
"calibration/coverage@5%": 0.09414444716242662,
|
|
"calibration/ece": 0.1394730920177929,
|
|
"calibration/mean_confidence": 0.5252188455799971,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 805.2,
|
|
"completions/max_terminated_length": 805.2,
|
|
"completions/mean_length": 197.9140625,
|
|
"completions/mean_terminated_length": 198.00892639160156,
|
|
"completions/min_length": 40.2,
|
|
"completions/min_terminated_length": 99.6,
|
|
"epoch": 0.512,
|
|
"grad_norm": 0.000822938047349453,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0005,
|
|
"num_tokens": 536532117.0,
|
|
"reward": 0.9397272348403931,
|
|
"reward_std": 0.0966149166226387,
|
|
"rewards/accgated_coverage_0": 0.02687025871127844,
|
|
"rewards/accgated_coverage_1": 0.02687025871127844,
|
|
"rewards/accgated_coverage_10": 0.026870235241949557,
|
|
"rewards/accgated_coverage_15": 0.026869393698871136,
|
|
"rewards/accgated_coverage_20": 0.02685362957417965,
|
|
"rewards/accgated_coverage_25": 0.02521761693060398,
|
|
"rewards/accgated_coverage_5": 0.02687025871127844,
|
|
"rewards/accuracy_reward": 0.52978515625,
|
|
"rewards/brier_reward": 0.7993659853935242,
|
|
"rewards/confidence_uniqueness_reward": 0.953278124332428,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0030142725445330143,
|
|
"rewards/frontier_ece_reward": 0.008514054864645005,
|
|
"rewards/frontier_entropy_batch_reward": -0.19641512036323547,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04578293636441231,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06032358705997467,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004578293673694134,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004578293673694134,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04578293636441231,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06032358705997467,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004578293673694134,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004578293673694134,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.045782843977212904,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.060323466360569,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004578284453600645,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004578284453600645,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.045781036466360094,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06032109335064888,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0045781034044921395,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0045781034044921395,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.045747237652540206,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.06027765348553658,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.00457472400739789,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.00457472400739789,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.042320456355810165,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.055887801200151445,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0042320455890148875,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0042320455890148875,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04578293636441231,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06032358705997467,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004578293673694134,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004578293673694134,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.107086181640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.14316701889038086,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0535430908203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0535430908203125,
|
|
"signal/advantage_abs_mean": 0.07598457634449005,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07598457634449005,
|
|
"signal/advantage_pre_scale_std": 0.11298816949129105,
|
|
"signal/advantage_std": 0.11298816949129105,
|
|
"signal/brier_reward/centered_abs_mean": 0.12746050655841829,
|
|
"signal/brier_reward/group_std_mean": 0.16615513563156128,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012746050581336021,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012746050581336021,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012305822782218456,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01691434346139431,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012305822689086198,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012305822689086198,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0034232284408062695,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00573572600260377,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.279035638319328e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.279035638319328e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.011389912478625775,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.014997617527842521,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011389912338927387,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011389912338927387,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27113571763038635,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3451143801212311,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027113571763038635,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027113571763038635,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22061787661354462,
|
|
"calibration/batch_distribution_entropy": 0.9858973245996058,
|
|
"calibration/buffer_distribution_entropy": 0.9956247437997872,
|
|
"calibration/confidence_entropy": 0.4769876723458033,
|
|
"calibration/coverage@0%": 0.03400807240704501,
|
|
"calibration/coverage@1%": 0.03400807240704501,
|
|
"calibration/coverage@10%": 0.2533459209882583,
|
|
"calibration/coverage@15%": 0.3706259173189824,
|
|
"calibration/coverage@20%": 0.5038940190802348,
|
|
"calibration/coverage@25%": 0.6262154476516635,
|
|
"calibration/coverage@30%": 0.7200090203033268,
|
|
"calibration/coverage@5%": 0.11651862157534247,
|
|
"calibration/ece": 0.10647224595062652,
|
|
"calibration/mean_confidence": 0.51159660174583,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00087890625,
|
|
"completions/max_length": 762.6,
|
|
"completions/max_terminated_length": 762.6,
|
|
"completions/mean_length": 201.21201171875,
|
|
"completions/mean_terminated_length": 201.38941650390626,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 102.2,
|
|
"epoch": 0.528,
|
|
"grad_norm": 0.0009442372247576714,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0002,
|
|
"num_tokens": 553622064.0,
|
|
"reward": 0.9389090180397034,
|
|
"reward_std": 0.09562487751245499,
|
|
"rewards/accgated_coverage_0": 0.029423439130187034,
|
|
"rewards/accgated_coverage_1": 0.029423439130187034,
|
|
"rewards/accgated_coverage_10": 0.029423421248793602,
|
|
"rewards/accgated_coverage_15": 0.029422985576093196,
|
|
"rewards/accgated_coverage_20": 0.029388283006846906,
|
|
"rewards/accgated_coverage_25": 0.02592604709789157,
|
|
"rewards/accgated_coverage_5": 0.029423439130187034,
|
|
"rewards/accuracy_reward": 0.52666015625,
|
|
"rewards/brier_reward": 0.8011533975601196,
|
|
"rewards/confidence_uniqueness_reward": 0.9519981503486633,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_aurc_reward": -0.0027007147902622817,
|
|
"rewards/frontier_ece_reward": 0.007845096942037343,
|
|
"rewards/frontier_entropy_batch_reward": -0.20241765975952147,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.051184893399477,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06596878245472908,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005118489358574152,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005118489358574152,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.051184893399477,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06596878245472908,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005118489358574152,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005118489358574152,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.05118483528494835,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06596870943903924,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00511848358437419,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00511848358437419,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.0511826254427433,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06596593111753464,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005118262674659491,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005118262674659491,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.051091020554304124,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.06585049033164977,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005109101999551058,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005109101999551058,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.043202555179595946,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.05595867335796356,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004320255620405078,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004320255620405078,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.051184893399477,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06596878245472908,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005118489358574152,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005118489358574152,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.121319580078125,
|
|
"signal/accuracy_reward/group_std_mean": 0.15382195711135865,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0606597900390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0606597900390625,
|
|
"signal/advantage_abs_mean": 0.07463176250457763,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07463176250457763,
|
|
"signal/advantage_pre_scale_std": 0.11222641915082932,
|
|
"signal/advantage_std": 0.11222641915082932,
|
|
"signal/brier_reward/centered_abs_mean": 0.12696380019187928,
|
|
"signal/brier_reward/group_std_mean": 0.16435499489307404,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01269638016819954,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01269638016819954,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013721022568643094,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.020104555040597917,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013721022522076964,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013721022522076964,
|
|
"signal/format_reward/centered_abs_mean": 0.00189208984375,
|
|
"signal/format_reward/group_std_mean": 0.00552427158690989,
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030401684809476135,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0051263408735394474,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.800210797635373e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.800210797635373e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009970997087657451,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.013070202618837356,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009970997925847769,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009970997925847769,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26574829816818235,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34063880443572997,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026574830710887908,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026574830710887908,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2375771201771093,
|
|
"calibration/batch_distribution_entropy": 0.9782534276807887,
|
|
"calibration/buffer_distribution_entropy": 0.9957714705066371,
|
|
"calibration/confidence_entropy": 0.4679197371894765,
|
|
"calibration/coverage@0%": 0.024221807729941293,
|
|
"calibration/coverage@1%": 0.024221807729941293,
|
|
"calibration/coverage@10%": 0.15002522627201564,
|
|
"calibration/coverage@15%": 0.2863648177592955,
|
|
"calibration/coverage@20%": 0.436377813111546,
|
|
"calibration/coverage@25%": 0.6114573140900196,
|
|
"calibration/coverage@30%": 0.700177348336595,
|
|
"calibration/coverage@5%": 0.08086243272994129,
|
|
"calibration/ece": 0.06541616967629962,
|
|
"calibration/mean_confidence": 0.5300458698977398,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00087890625,
|
|
"completions/max_length": 735.8,
|
|
"completions/max_terminated_length": 735.8,
|
|
"completions/mean_length": 205.46064453125,
|
|
"completions/mean_terminated_length": 205.6419891357422,
|
|
"completions/min_length": 18.6,
|
|
"completions/min_terminated_length": 98.0,
|
|
"epoch": 0.544,
|
|
"grad_norm": 0.0008544324082322419,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.001,
|
|
"num_tokens": 570889565.0,
|
|
"reward": 0.9480875849723815,
|
|
"reward_std": 0.09817802309989929,
|
|
"rewards/accgated_coverage_0": 0.024963770434260367,
|
|
"rewards/accgated_coverage_1": 0.024963770434260367,
|
|
"rewards/accgated_coverage_10": 0.024963770434260367,
|
|
"rewards/accgated_coverage_15": 0.02495814934372902,
|
|
"rewards/accgated_coverage_20": 0.024749431759119034,
|
|
"rewards/accgated_coverage_25": 0.020984043180942536,
|
|
"rewards/accgated_coverage_5": 0.024963770434260367,
|
|
"rewards/accuracy_reward": 0.55380859375,
|
|
"rewards/brier_reward": 0.7872178554534912,
|
|
"rewards/confidence_uniqueness_reward": 0.9521996140480041,
|
|
"rewards/format_reward": 0.99912109375,
|
|
"rewards/frontier_aurc_reward": -0.002863905020058155,
|
|
"rewards/frontier_ece_reward": 0.0063414408825337885,
|
|
"rewards/frontier_entropy_batch_reward": -0.19972009658813478,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.05637867748737335,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.07339582145214081,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00563786793500185,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00563786793500185,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.05637867748737335,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.07339582145214081,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00563786793500185,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00563786793500185,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.05637867748737335,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.07339582145214081,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00563786793500185,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00563786793500185,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.05637001916766167,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.0733846127986908,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0056370020844042305,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0056370020844042305,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.055931567400693896,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.07282639741897583,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005593156814575196,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005593156814575196,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.043465451896190645,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.0568954698741436,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004346545320004225,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004346545320004225,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.05637867748737335,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.07339582145214081,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00563786793500185,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00563786793500185,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.124334716796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1655841737985611,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.528125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0621673583984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0621673583984375,
|
|
"signal/advantage_abs_mean": 0.07526060044765473,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07526060044765473,
|
|
"signal/advantage_pre_scale_std": 0.11205310374498367,
|
|
"signal/advantage_std": 0.11205310374498367,
|
|
"signal/brier_reward/centered_abs_mean": 0.1371670126914978,
|
|
"signal/brier_reward/group_std_mean": 0.17753443121910095,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013716701604425906,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013716701604425906,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013424108549952508,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01916743740439415,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013424108503386379,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013424108503386379,
|
|
"signal/format_reward/centered_abs_mean": 0.001690673828125,
|
|
"signal/format_reward/group_std_mean": 0.004635535972192883,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003059108229354024,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005276176985353232,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.823885344900191e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.823885344900191e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009803688712418079,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012898603454232216,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000980368908494711,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000980368908494711,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26505613327026367,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3396241843700409,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026505614444613457,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026505614444613457,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25894136057077977,
|
|
"calibration/batch_distribution_entropy": 0.9847305140826528,
|
|
"calibration/buffer_distribution_entropy": 0.9956841179631131,
|
|
"calibration/confidence_entropy": 0.4782007518896073,
|
|
"calibration/coverage@0%": 0.06133041829745597,
|
|
"calibration/coverage@1%": 0.10586166829745598,
|
|
"calibration/coverage@10%": 0.248049168297456,
|
|
"calibration/coverage@15%": 0.3293610873287671,
|
|
"calibration/coverage@20%": 0.3966150929549902,
|
|
"calibration/coverage@25%": 0.4638652764187867,
|
|
"calibration/coverage@30%": 0.6123845706947162,
|
|
"calibration/coverage@5%": 0.21445541829745599,
|
|
"calibration/ece": 0.11833698865364059,
|
|
"calibration/mean_confidence": 0.49375283827476124,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 695.6,
|
|
"completions/max_terminated_length": 695.6,
|
|
"completions/mean_length": 209.71845703125,
|
|
"completions/mean_terminated_length": 209.7785430908203,
|
|
"completions/min_length": 63.6,
|
|
"completions/min_terminated_length": 103.2,
|
|
"epoch": 0.56,
|
|
"grad_norm": 0.0007731578662060201,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 587858490.0,
|
|
"reward": 0.9307716369628907,
|
|
"reward_std": 0.09170439690351487,
|
|
"rewards/accgated_coverage_0": 0.028712420910596847,
|
|
"rewards/accgated_coverage_1": 0.028712420910596847,
|
|
"rewards/accgated_coverage_10": 0.028705807775259017,
|
|
"rewards/accgated_coverage_15": 0.028688276931643487,
|
|
"rewards/accgated_coverage_20": 0.02831815704703331,
|
|
"rewards/accgated_coverage_25": 0.023915531113743782,
|
|
"rewards/accgated_coverage_5": 0.028712420910596847,
|
|
"rewards/accuracy_reward": 0.5115234375,
|
|
"rewards/brier_reward": 0.8015724778175354,
|
|
"rewards/confidence_uniqueness_reward": 0.9525643587112427,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0028625247068703175,
|
|
"rewards/frontier_ece_reward": 0.006507827714085579,
|
|
"rewards/frontier_entropy_batch_reward": -0.20399945378303527,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04774615317583084,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.0612909272313118,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004774615447968244,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004774615447968244,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04774615317583084,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.0612909272313118,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004774615447968244,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004774615447968244,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.047740576416254045,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.061283988505601884,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004774057678878308,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004774057678878308,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04771261513233185,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.061248501390218736,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004771261801943183,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004771261801943183,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04683285281062126,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.06014049053192139,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0046832853928208355,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0046832853928208355,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.034598128870129585,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.04468399733304977,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.003459813119843602,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.003459813119843602,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04774615317583084,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.0612909272313118,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004774615447968244,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004774615447968244,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10382080078125,
|
|
"signal/accuracy_reward/group_std_mean": 0.14004869312047957,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051910400390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051910400390625,
|
|
"signal/advantage_abs_mean": 0.07106384858489037,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07106384858489037,
|
|
"signal/advantage_pre_scale_std": 0.10787554085254669,
|
|
"signal/advantage_std": 0.10787554085254669,
|
|
"signal/brier_reward/centered_abs_mean": 0.12780316174030304,
|
|
"signal/brier_reward/group_std_mean": 0.16514424681663514,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01278031598776579,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01278031598776579,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012605937756597995,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016831617429852487,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012605937663465738,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012605937663465738,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00284982449375093,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004916798043996096,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5622806171886624e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5622806171886624e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008643861301243306,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011418106593191623,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008643861394375563,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008643861394375563,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2669390320777893,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34026256799697874,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026693902909755707,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026693902909755707,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3135997210381514,
|
|
"calibration/batch_distribution_entropy": 0.9833378524753092,
|
|
"calibration/buffer_distribution_entropy": 0.9960817327032949,
|
|
"calibration/confidence_entropy": 0.48438792514362294,
|
|
"calibration/coverage@0%": 0.015628063725490197,
|
|
"calibration/coverage@1%": 0.015628063725490197,
|
|
"calibration/coverage@10%": 0.0859405637254902,
|
|
"calibration/coverage@15%": 0.1921905637254902,
|
|
"calibration/coverage@20%": 0.27736825980392155,
|
|
"calibration/coverage@25%": 0.38324908088235293,
|
|
"calibration/coverage@30%": 0.48799325980392155,
|
|
"calibration/coverage@5%": 0.015628063725490197,
|
|
"calibration/ece": 0.0884428689859307,
|
|
"calibration/mean_confidence": 0.4981117414839547,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 578.0,
|
|
"completions/max_terminated_length": 578.0,
|
|
"completions/mean_length": 209.08427734375,
|
|
"completions/mean_terminated_length": 209.2507751464844,
|
|
"completions/min_length": 22.4,
|
|
"completions/min_terminated_length": 100.6,
|
|
"epoch": 0.576,
|
|
"grad_norm": 0.0007929888088256121,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0009,
|
|
"num_tokens": 605186137.0,
|
|
"reward": 0.9259281992912293,
|
|
"reward_std": 0.08852628320455551,
|
|
"rewards/accgated_coverage_0": 0.026113039441406728,
|
|
"rewards/accgated_coverage_1": 0.026113039441406728,
|
|
"rewards/accgated_coverage_10": 0.02611410915851593,
|
|
"rewards/accgated_coverage_15": 0.02612158302217722,
|
|
"rewards/accgated_coverage_20": 0.026075875945389272,
|
|
"rewards/accgated_coverage_25": 0.020272112637758254,
|
|
"rewards/accgated_coverage_5": 0.026113039441406728,
|
|
"rewards/accuracy_reward": 0.50966796875,
|
|
"rewards/brier_reward": 0.7865149855613709,
|
|
"rewards/confidence_uniqueness_reward": 0.9518932700157166,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.00332046071998775,
|
|
"rewards/frontier_ece_reward": 0.005302710318937897,
|
|
"rewards/frontier_entropy_batch_reward": -0.20537003576755525,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04404938668012619,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.05746869742870331,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004404938966035843,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004404938966035843,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04404938668012619,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.05746869742870331,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004404938966035843,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004404938966035843,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04404330998659134,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.057460909336805345,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004404331091791391,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004404331091791391,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.0439866840839386,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.05738692060112953,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004398668650537729,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004398668650537729,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04301303252577782,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.05612108111381531,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004301303531974554,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004301303531974554,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.02978185787796974,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.03898368887603283,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.002978185843676329,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.002978185843676329,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04404938668012619,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.05746869742870331,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004404938966035843,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004404938966035843,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.091851806640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.12905680239200593,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0459259033203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0459259033203125,
|
|
"signal/advantage_abs_mean": 0.06676195412874222,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06676195412874222,
|
|
"signal/advantage_pre_scale_std": 0.10248180478811264,
|
|
"signal/advantage_std": 0.10248180478811264,
|
|
"signal/brier_reward/centered_abs_mean": 0.126788030564785,
|
|
"signal/brier_reward/group_std_mean": 0.16423529982566834,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012678803689777852,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012678803689777852,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013680145144462585,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.019239641726017,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013680145610123872,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013680145610123872,
|
|
"signal/format_reward/centered_abs_mean": 0.00150146484375,
|
|
"signal/format_reward/group_std_mean": 0.004083108901977539,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000750732421875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000750732421875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029450747650116684,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004820974357426166,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.681343514472246e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.681343514472246e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008131541311740875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010789497010409831,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008131541428156198,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008131541428156198,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26855767965316774,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3468548893928528,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026855768263339998,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026855768263339998,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2993429486542598,
|
|
"calibration/batch_distribution_entropy": 0.9803370668886563,
|
|
"calibration/buffer_distribution_entropy": 0.9961007246434563,
|
|
"calibration/confidence_entropy": 0.4802297145211024,
|
|
"calibration/coverage@0%": 0.016030149217221134,
|
|
"calibration/coverage@1%": 0.016030149217221134,
|
|
"calibration/coverage@10%": 0.22949639187866927,
|
|
"calibration/coverage@15%": 0.3237639126712329,
|
|
"calibration/coverage@20%": 0.41446229818982394,
|
|
"calibration/coverage@25%": 0.5055933524951076,
|
|
"calibration/coverage@30%": 0.5885044642857142,
|
|
"calibration/coverage@5%": 0.11103993395303327,
|
|
"calibration/ece": 0.1322413696660289,
|
|
"calibration/mean_confidence": 0.4870327745342595,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 644.8,
|
|
"completions/max_terminated_length": 644.8,
|
|
"completions/mean_length": 209.5166015625,
|
|
"completions/mean_terminated_length": 209.68014831542968,
|
|
"completions/min_length": 20.2,
|
|
"completions/min_terminated_length": 101.6,
|
|
"epoch": 0.592,
|
|
"grad_norm": 0.0008513347711414099,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0006,
|
|
"num_tokens": 622499299.0,
|
|
"reward": 0.9328482508659363,
|
|
"reward_std": 0.08548958897590637,
|
|
"rewards/accgated_coverage_0": 0.03096109293401241,
|
|
"rewards/accgated_coverage_1": 0.03096109293401241,
|
|
"rewards/accgated_coverage_10": 0.030960745736956598,
|
|
"rewards/accgated_coverage_15": 0.03093497231602669,
|
|
"rewards/accgated_coverage_20": 0.030443714559078218,
|
|
"rewards/accgated_coverage_25": 0.02200573980808258,
|
|
"rewards/accgated_coverage_5": 0.03096109293401241,
|
|
"rewards/accuracy_reward": 0.51962890625,
|
|
"rewards/brier_reward": 0.7947103619575501,
|
|
"rewards/confidence_uniqueness_reward": 0.9510635375976563,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.0027746261563152074,
|
|
"rewards/frontier_ece_reward": 0.0057756159454584125,
|
|
"rewards/frontier_entropy_batch_reward": -0.22418674528598787,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04694317653775215,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.060206232219934465,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0046943177469074724,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0046943177469074724,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04694317653775215,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.060206232219934465,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0046943177469074724,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0046943177469074724,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.046942750364542006,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06020570695400238,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004694275092333555,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004694275092333555,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04689032584428787,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06014012470841408,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004689032770693302,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004689032770693302,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04522727727890015,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.05803100317716599,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004522727569565177,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004522727569565177,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.0300223208963871,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.03874893710017204,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0030022321734577417,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0030022321734577417,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04694317653775215,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.060206232219934465,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0046943177469074724,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0046943177469074724,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.094110107421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12543713301420212,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0470550537109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0470550537109375,
|
|
"signal/advantage_abs_mean": 0.06528689339756966,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06528689339756966,
|
|
"signal/advantage_pre_scale_std": 0.10016652047634125,
|
|
"signal/advantage_std": 0.10016652047634125,
|
|
"signal/brier_reward/centered_abs_mean": 0.12150534689426422,
|
|
"signal/brier_reward/group_std_mean": 0.1580636113882065,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012150534801185131,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012150534801185131,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01382814794778824,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.019691282883286476,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013828148366883397,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013828148366883397,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_std_mean": 0.004419417260214687,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002671397430822253,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004718466103076935,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.339246795803774e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.339246795803774e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007670730352401733,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010097111575305462,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007670730352401734,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007670730352401734,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28037108182907106,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3554627299308777,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02803710997104645,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02803710997104645,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2385094967467943,
|
|
"calibration/batch_distribution_entropy": 0.954137694554461,
|
|
"calibration/buffer_distribution_entropy": 0.9959365000462033,
|
|
"calibration/confidence_entropy": 0.4544575702132888,
|
|
"calibration/coverage@0%": 0.03711166829745597,
|
|
"calibration/coverage@1%": 0.03711166829745597,
|
|
"calibration/coverage@10%": 0.27123899217221137,
|
|
"calibration/coverage@15%": 0.3876796416340509,
|
|
"calibration/coverage@20%": 0.4662258439334638,
|
|
"calibration/coverage@25%": 0.57524844055773,
|
|
"calibration/coverage@30%": 0.6795751284246576,
|
|
"calibration/coverage@5%": 0.16298694349315068,
|
|
"calibration/ece": 0.09929816570177578,
|
|
"calibration/mean_confidence": 0.4615513737745732,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 580.0,
|
|
"completions/max_terminated_length": 580.0,
|
|
"completions/mean_length": 211.66328125,
|
|
"completions/mean_terminated_length": 211.7886199951172,
|
|
"completions/min_length": 62.8,
|
|
"completions/min_terminated_length": 107.0,
|
|
"epoch": 0.608,
|
|
"grad_norm": 0.0006291710305958986,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 639666219.0,
|
|
"reward": 0.9342318534851074,
|
|
"reward_std": 0.0812569260597229,
|
|
"rewards/accgated_coverage_0": 0.03671490699052811,
|
|
"rewards/accgated_coverage_1": 0.03671490699052811,
|
|
"rewards/accgated_coverage_10": 0.03670356012880802,
|
|
"rewards/accgated_coverage_15": 0.03666727505624294,
|
|
"rewards/accgated_coverage_20": 0.03530341759324074,
|
|
"rewards/accgated_coverage_25": 0.025559740513563155,
|
|
"rewards/accgated_coverage_5": 0.03671490699052811,
|
|
"rewards/accuracy_reward": 0.5142578125,
|
|
"rewards/brier_reward": 0.8174492716789246,
|
|
"rewards/confidence_uniqueness_reward": 0.9497005462646484,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.002493387321010232,
|
|
"rewards/frontier_ece_reward": 0.006145768519490957,
|
|
"rewards/frontier_entropy_batch_reward": -0.24340350329875945,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04637901484966278,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.059489642083644864,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004637901578098536,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004637901578098536,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04637901484966278,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.059489642083644864,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004637901578098536,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004637901578098536,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.046342677623033526,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.05944279730319977,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004634267929941416,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004634267929941416,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04627092853188515,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.05935083627700806,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004627093113958836,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004627093113958836,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04318385422229767,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.055419516563415525,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004318385478109121,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004318385478109121,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.028055806085467337,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.036068766564130786,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.002805580664426088,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.002805580664426088,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04637901484966278,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.059489642083644864,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004637901578098536,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004637901578098536,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09171142578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12622790932655334,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045855712890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045855712890625,
|
|
"signal/advantage_abs_mean": 0.061551207304000856,
|
|
"signal/advantage_pre_scale_abs_mean": 0.061551207304000856,
|
|
"signal/advantage_pre_scale_std": 0.09523071944713593,
|
|
"signal/advantage_std": 0.09523071944713593,
|
|
"signal/brier_reward/centered_abs_mean": 0.11499268561601639,
|
|
"signal/brier_reward/group_std_mean": 0.1495683193206787,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011499268747866154,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011499268747866154,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01518423892557621,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.020625585690140724,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001518423925153911,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001518423925153911,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_std_mean": 0.0033145628869533537,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002332291193306446,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003978639096021652,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9153642390156165e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9153642390156165e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007376821059733629,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009607397019863129,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007376821245998144,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007376821245998144,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29461329579353335,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36972410678863527,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02946133129298687,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02946133129298687,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2620595794462538,
|
|
"calibration/batch_distribution_entropy": 0.987211676069754,
|
|
"calibration/buffer_distribution_entropy": 0.995760329457571,
|
|
"calibration/confidence_entropy": 0.48855260610841517,
|
|
"calibration/coverage@0%": 0.026960004892367904,
|
|
"calibration/coverage@1%": 0.026960004892367904,
|
|
"calibration/coverage@10%": 0.17078109711350292,
|
|
"calibration/coverage@15%": 0.25478152519569475,
|
|
"calibration/coverage@20%": 0.3919123043052838,
|
|
"calibration/coverage@25%": 0.4970508194716243,
|
|
"calibration/coverage@30%": 0.6029361851761252,
|
|
"calibration/coverage@5%": 0.05979085127201565,
|
|
"calibration/ece": 0.0907151147042236,
|
|
"calibration/mean_confidence": 0.507227250756811,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00107421875,
|
|
"completions/max_length": 601.6,
|
|
"completions/max_terminated_length": 601.6,
|
|
"completions/mean_length": 211.51748046875,
|
|
"completions/mean_terminated_length": 211.74548950195313,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 103.8,
|
|
"epoch": 0.624,
|
|
"grad_norm": 0.0008388682035729289,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0011,
|
|
"num_tokens": 657176062.0,
|
|
"reward": 0.9389870643615723,
|
|
"reward_std": 0.08788901269435882,
|
|
"rewards/accgated_coverage_0": 0.031854826211929324,
|
|
"rewards/accgated_coverage_1": 0.031854826211929324,
|
|
"rewards/accgated_coverage_10": 0.031831147894263265,
|
|
"rewards/accgated_coverage_15": 0.03175428584218025,
|
|
"rewards/accgated_coverage_20": 0.029630653187632562,
|
|
"rewards/accgated_coverage_25": 0.02046764940023422,
|
|
"rewards/accgated_coverage_5": 0.03185361251235008,
|
|
"rewards/accuracy_reward": 0.5208984375,
|
|
"rewards/brier_reward": 0.8073629021644593,
|
|
"rewards/confidence_uniqueness_reward": 0.9527830243110657,
|
|
"rewards/format_reward": 0.99892578125,
|
|
"rewards/frontier_aurc_reward": -0.002760437550023198,
|
|
"rewards/frontier_ece_reward": 0.004892275249585509,
|
|
"rewards/frontier_entropy_batch_reward": -0.18319073915481568,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04739323109388351,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.0609379231929779,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004739323165267706,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004739323165267706,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04739323109388351,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.0609379231929779,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004739323165267706,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004739323165267706,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04734518453478813,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06087752133607864,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00473451865836978,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00473451865836978,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04715350121259689,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06063656434416771,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004715350363403559,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004715350363403559,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04258274808526039,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.054887625575065616,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0042582748923450705,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0042582748923450705,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.02506561353802681,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.03276568688452244,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0025065614376217128,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0025065614376217128,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04739024117588997,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.060934138298034665,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0047390243038535115,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0047390243038535115,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10511474609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.13674385845661163,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.615625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052557373046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.052557373046875,
|
|
"signal/advantage_abs_mean": 0.06808174103498459,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06808174103498459,
|
|
"signal/advantage_pre_scale_std": 0.10456641763448715,
|
|
"signal/advantage_std": 0.10456641763448715,
|
|
"signal/brier_reward/centered_abs_mean": 0.12090775668621064,
|
|
"signal/brier_reward/group_std_mean": 0.15719686448574066,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012090775556862355,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012090775556862355,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012956660613417626,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01948312222957611,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012956660706549884,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012956660706549884,
|
|
"signal/format_reward/centered_abs_mean": 0.002081298828125,
|
|
"signal/format_reward/group_std_mean": 0.006076698796823621,
|
|
"signal/format_reward/group_zero_std_frac": 0.965625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002556943567469716,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004279503040015698,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.196179386577569e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.196179386577569e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006517344154417515,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.008778749220073222,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000651734450366348,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000651734450366348,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25290383100509645,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3234200954437256,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025290383026003838,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025290383026003838,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28109282115966183,
|
|
"calibration/batch_distribution_entropy": 0.9724146586954063,
|
|
"calibration/buffer_distribution_entropy": 0.9958038898068187,
|
|
"calibration/confidence_entropy": 0.489667227589128,
|
|
"calibration/coverage@0%": 0.02269141389432485,
|
|
"calibration/coverage@1%": 0.02269141389432485,
|
|
"calibration/coverage@10%": 0.2932477678571429,
|
|
"calibration/coverage@15%": 0.3534368884540117,
|
|
"calibration/coverage@20%": 0.4058073935909981,
|
|
"calibration/coverage@25%": 0.5222603550715305,
|
|
"calibration/coverage@30%": 0.6137110142436148,
|
|
"calibration/coverage@5%": 0.11897321428571428,
|
|
"calibration/ece": 0.16706667744881706,
|
|
"calibration/mean_confidence": 0.5486211101060794,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00166015625,
|
|
"completions/max_length": 734.4,
|
|
"completions/max_terminated_length": 734.4,
|
|
"completions/mean_length": 212.99970703125,
|
|
"completions/mean_terminated_length": 213.35613098144532,
|
|
"completions/min_length": 43.2,
|
|
"completions/min_terminated_length": 104.0,
|
|
"epoch": 0.64,
|
|
"grad_norm": 0.0009371961350552738,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0011,
|
|
"num_tokens": 674699867.0,
|
|
"reward": 0.9512609839439392,
|
|
"reward_std": 0.08684130012989044,
|
|
"rewards/accgated_coverage_0": 0.02471376843750477,
|
|
"rewards/accgated_coverage_1": 0.02471376843750477,
|
|
"rewards/accgated_coverage_10": 0.024693097919225693,
|
|
"rewards/accgated_coverage_15": 0.024671100080013275,
|
|
"rewards/accgated_coverage_20": 0.022685779072344304,
|
|
"rewards/accgated_coverage_25": 0.016758498549461365,
|
|
"rewards/accgated_coverage_5": 0.024713458120822908,
|
|
"rewards/accuracy_reward": 0.56240234375,
|
|
"rewards/brier_reward": 0.8025913000106811,
|
|
"rewards/confidence_uniqueness_reward": 0.9504665613174439,
|
|
"rewards/format_reward": 0.99833984375,
|
|
"rewards/frontier_aurc_reward": -0.002733389986678958,
|
|
"rewards/frontier_ece_reward": 0.004934624442830682,
|
|
"rewards/frontier_entropy_batch_reward": -0.21170130372047424,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04717938750982285,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06157350316643715,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004717938927933573,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004717938927933573,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04717938750982285,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06157350316643715,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004717938927933573,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004717938927933573,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.047129976004362105,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06151040866971016,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004712997563183307,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004712997563183307,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04697373732924461,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06130784824490547,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004697373416274786,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004697373416274786,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.041337736323475836,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.05399098321795463,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004133773688226938,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004133773688226938,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.023771359771490096,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.03101888746023178,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0023771360516548156,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0023771360516548156,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04717613756656647,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06156923472881317,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00471761361695826,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00471761361695826,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.087603759765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.12051970660686492,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0438018798828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0438018798828125,
|
|
"signal/advantage_abs_mean": 0.06671606823801994,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06671606823801994,
|
|
"signal/advantage_pre_scale_std": 0.10374155789613723,
|
|
"signal/advantage_std": 0.10374155789613723,
|
|
"signal/brier_reward/centered_abs_mean": 0.11449979990720749,
|
|
"signal/brier_reward/group_std_mean": 0.14780859649181366,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011449980735778808,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011449980735778808,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014644245617091656,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.020360873267054557,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014644246315583588,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014644246315583588,
|
|
"signal/format_reward/centered_abs_mean": 0.002838134765625,
|
|
"signal/format_reward/group_std_mean": 0.005614831438288092,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014190673828125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0014190673828125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002792434743605554,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004751656157895923,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.490543349471409e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.490543349471409e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006552870571613312,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.008730523101985455,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006552870734594762,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006552870734594762,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2739957094192505,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3482341289520264,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02739957198500633,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02739957198500633,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"eval_calibration/aurc": 0.4598180669050629,
|
|
"eval_calibration/batch_distribution_entropy": 0.9273818230064164,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9958763092805967,
|
|
"eval_calibration/confidence_entropy": 0.493588963586702,
|
|
"eval_calibration/coverage@0%": 0.0625,
|
|
"eval_calibration/coverage@1%": 0.0625,
|
|
"eval_calibration/coverage@10%": 0.0625,
|
|
"eval_calibration/coverage@15%": 0.09375,
|
|
"eval_calibration/coverage@20%": 0.15625,
|
|
"eval_calibration/coverage@25%": 0.234375,
|
|
"eval_calibration/coverage@30%": 0.2421875,
|
|
"eval_calibration/coverage@5%": 0.0625,
|
|
"eval_calibration/ece": 0.21956684336417714,
|
|
"eval_calibration/mean_confidence": 0.4808397541493321,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 668.0,
|
|
"eval_completions/max_terminated_length": 668.0,
|
|
"eval_completions/mean_length": 214.29525756835938,
|
|
"eval_completions/mean_terminated_length": 214.29525756835938,
|
|
"eval_completions/min_length": 119.75,
|
|
"eval_completions/min_terminated_length": 119.75,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 674699867.0,
|
|
"eval_reward": 0.798967257142067,
|
|
"eval_reward_std": 0.2201063111424446,
|
|
"eval_rewards/accgated_coverage_0": 0.030158083885908127,
|
|
"eval_rewards/accgated_coverage_1": 0.030158083885908127,
|
|
"eval_rewards/accgated_coverage_10": 0.030138885602355003,
|
|
"eval_rewards/accgated_coverage_15": 0.030080335214734077,
|
|
"eval_rewards/accgated_coverage_20": 0.027232197113335133,
|
|
"eval_rewards/accgated_coverage_25": 0.015601862454786897,
|
|
"eval_rewards/accgated_coverage_5": 0.03015622543171048,
|
|
"eval_rewards/accuracy_reward": 0.419921875,
|
|
"eval_rewards/brier_reward": 0.791017934679985,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.902099609375,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0034560004714876413,
|
|
"eval_rewards/frontier_ece_reward": 0.003851950401440263,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 28.9615,
|
|
"eval_samples_per_second": 17.264,
|
|
"eval_signal/accgated_coverage_0/centered_abs_mean": 0.06193764880299568,
|
|
"eval_signal/accgated_coverage_0/group_std_mean": 0.07639571651816368,
|
|
"eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0061937650898471475,
|
|
"eval_signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0061937650898471475,
|
|
"eval_signal/accgated_coverage_1/centered_abs_mean": 0.06193764880299568,
|
|
"eval_signal/accgated_coverage_1/group_std_mean": 0.07639571651816368,
|
|
"eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0061937650898471475,
|
|
"eval_signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0061937650898471475,
|
|
"eval_signal/accgated_coverage_10/centered_abs_mean": 0.06187119986861944,
|
|
"eval_signal/accgated_coverage_10/group_std_mean": 0.07631925866007805,
|
|
"eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.006187119870446622,
|
|
"eval_signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.006187119870446622,
|
|
"eval_signal/accgated_coverage_15/centered_abs_mean": 0.061731474474072456,
|
|
"eval_signal/accgated_coverage_15/group_std_mean": 0.0761583186686039,
|
|
"eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006173147703520954,
|
|
"eval_signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006173147703520954,
|
|
"eval_signal/accgated_coverage_20/centered_abs_mean": 0.05488021858036518,
|
|
"eval_signal/accgated_coverage_20/group_std_mean": 0.06819487921893597,
|
|
"eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005488021764904261,
|
|
"eval_signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005488021764904261,
|
|
"eval_signal/accgated_coverage_25/centered_abs_mean": 0.027537908405065536,
|
|
"eval_signal/accgated_coverage_25/group_std_mean": 0.03532271645963192,
|
|
"eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.002753790933638811,
|
|
"eval_signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.002753790933638811,
|
|
"eval_signal/accgated_coverage_5/centered_abs_mean": 0.06193420384079218,
|
|
"eval_signal/accgated_coverage_5/group_std_mean": 0.07639174908399582,
|
|
"eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0061934206169098616,
|
|
"eval_signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0061934206169098616,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4720458984375,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.493278868496418,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23602294921875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23602294921875,
|
|
"eval_signal/advantage_abs_mean": 0.20314034819602966,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20314034819602966,
|
|
"eval_signal/advantage_pre_scale_std": 0.2178102284669876,
|
|
"eval_signal/advantage_std": 0.2178102284669876,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.1911204643547535,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2394135519862175,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01911204680800438,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01911204680800438,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0389862060546875,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04497408773750067,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038986208382993937,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038986208382993937,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0041074592736549675,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007324753561988473,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.134324328537332e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.134324328537332e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.007128268596716225,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.009997925953939557,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000712826891685836,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000712826891685836,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.138,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4425023850966543,
|
|
"calibration/batch_distribution_entropy": 0.973929548474511,
|
|
"calibration/buffer_distribution_entropy": 0.9959812632830186,
|
|
"calibration/confidence_entropy": 0.517053029578119,
|
|
"calibration/coverage@0%": 0.003520211594911937,
|
|
"calibration/coverage@1%": 0.003520211594911937,
|
|
"calibration/coverage@10%": 0.005868548189823875,
|
|
"calibration/coverage@15%": 0.005868548189823875,
|
|
"calibration/coverage@20%": 0.014860567514677103,
|
|
"calibration/coverage@25%": 0.1172073752446184,
|
|
"calibration/coverage@30%": 0.20900577910958903,
|
|
"calibration/coverage@5%": 0.003520211594911937,
|
|
"calibration/ece": 0.11917904343810719,
|
|
"calibration/mean_confidence": 0.4836532776329685,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 679.8,
|
|
"completions/max_terminated_length": 679.8,
|
|
"completions/mean_length": 213.4392578125,
|
|
"completions/mean_terminated_length": 213.5014862060547,
|
|
"completions/min_length": 43.8,
|
|
"completions/min_terminated_length": 101.2,
|
|
"epoch": 0.656,
|
|
"grad_norm": 0.0007970785372890532,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0003,
|
|
"num_tokens": 691742029.0,
|
|
"reward": 0.9179041743278503,
|
|
"reward_std": 0.0921265184879303,
|
|
"rewards/accgated_coverage_0": 0.025301176682114603,
|
|
"rewards/accgated_coverage_1": 0.025301176682114603,
|
|
"rewards/accgated_coverage_10": 0.025292183458805084,
|
|
"rewards/accgated_coverage_15": 0.025229696184396744,
|
|
"rewards/accgated_coverage_20": 0.02275848053395748,
|
|
"rewards/accgated_coverage_25": 0.014438183046877384,
|
|
"rewards/accgated_coverage_5": 0.025299759954214095,
|
|
"rewards/accuracy_reward": 0.4931640625,
|
|
"rewards/brier_reward": 0.7853361010551453,
|
|
"rewards/confidence_uniqueness_reward": 0.9532147526741028,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.003199864272028208,
|
|
"rewards/frontier_ece_reward": 0.0035779656376689673,
|
|
"rewards/frontier_entropy_batch_reward": -0.19066329896450043,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.040899327397346495,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.05284639969468117,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0040899327024817465,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0040899327024817465,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.040899327397346495,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.05284639969468117,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0040899327024817465,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0040899327024817465,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04087764136493206,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.05281898975372314,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0040877643041312695,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0040877643041312695,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04076016694307327,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.052670329064130786,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004076016694307327,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004076016694307327,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.036109994351863864,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.046791880205273625,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0036109994165599347,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0036109994165599347,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.01965227909386158,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.025820601359009744,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0019652278628200293,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0019652278628200293,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04089687131345272,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.052843216061592105,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004089687252417207,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004089687252417207,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10579833984375,
|
|
"signal/accuracy_reward/group_std_mean": 0.14109778702259063,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052899169921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.052899169921875,
|
|
"signal/advantage_abs_mean": 0.07221986800432205,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07221986800432205,
|
|
"signal/advantage_pre_scale_std": 0.10920778065919876,
|
|
"signal/advantage_std": 0.10920778065919876,
|
|
"signal/brier_reward/centered_abs_mean": 0.12306497395038604,
|
|
"signal/brier_reward/group_std_mean": 0.15778571367263794,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012306497804820538,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012306497804820538,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012087763845920562,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01594572402536869,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012087764218449593,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012087764218449593,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028173317667096855,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004808265902101994,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.521664693835191e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.521664693835191e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0057150271721184255,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007791910413652658,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005715027218684554,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005715027218684554,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.264396995306015,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34044753313064574,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02643970064818859,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02643970064818859,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2870710130201714,
|
|
"calibration/batch_distribution_entropy": 0.9753064420948899,
|
|
"calibration/buffer_distribution_entropy": 0.9963448794536054,
|
|
"calibration/confidence_entropy": 0.4887674307638491,
|
|
"calibration/coverage@0%": 0.00703125,
|
|
"calibration/coverage@1%": 0.00703125,
|
|
"calibration/coverage@10%": 0.15625,
|
|
"calibration/coverage@15%": 0.233984375,
|
|
"calibration/coverage@20%": 0.29375,
|
|
"calibration/coverage@25%": 0.409375,
|
|
"calibration/coverage@30%": 0.518359375,
|
|
"calibration/coverage@5%": 0.080859375,
|
|
"calibration/ece": 0.1298590207189613,
|
|
"calibration/mean_confidence": 0.49373118934714694,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 768.4,
|
|
"completions/max_terminated_length": 768.4,
|
|
"completions/mean_length": 210.81796875,
|
|
"completions/mean_terminated_length": 210.88012084960937,
|
|
"completions/min_length": 39.2,
|
|
"completions/min_terminated_length": 99.8,
|
|
"epoch": 0.672,
|
|
"grad_norm": 0.0009423012379556894,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 708814245.0,
|
|
"reward": 0.9310911059379577,
|
|
"reward_std": 0.08257418423891068,
|
|
"rewards/accgated_coverage_0": 0.033774099126458165,
|
|
"rewards/accgated_coverage_1": 0.033774099126458165,
|
|
"rewards/accgated_coverage_10": 0.03377165608108044,
|
|
"rewards/accgated_coverage_15": 0.03372667729854584,
|
|
"rewards/accgated_coverage_20": 0.030913470312952995,
|
|
"rewards/accgated_coverage_25": 0.01957174502313137,
|
|
"rewards/accgated_coverage_5": 0.03377415724098683,
|
|
"rewards/accuracy_reward": 0.51240234375,
|
|
"rewards/brier_reward": 0.7978991866111755,
|
|
"rewards/confidence_uniqueness_reward": 0.950904655456543,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.00275814956985414,
|
|
"rewards/frontier_ece_reward": 0.004061966063454747,
|
|
"rewards/frontier_entropy_batch_reward": -0.2214625895023346,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.047636684775352475,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06049715206027031,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004763668589293957,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004763668589293957,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.047636684775352475,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06049715206027031,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004763668589293957,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004763668589293957,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.047632255405187604,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.060491522401571275,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004763225605711341,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004763225605711341,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04751182347536087,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06034188643097878,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004751182394102216,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004751182394102216,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04096822217106819,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.052256053686141966,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.00409682234749198,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.00409682234749198,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.022413133084774016,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.028731198236346244,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0022413132712244986,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0022413132712244986,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04763597846031189,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06049617603421211,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004763598088175058,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004763598088175058,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.097918701171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.13083914667367935,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0489593505859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0489593505859375,
|
|
"signal/advantage_abs_mean": 0.06339110806584358,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06339110806584358,
|
|
"signal/advantage_pre_scale_std": 0.09667609930038452,
|
|
"signal/advantage_std": 0.09667609930038452,
|
|
"signal/brier_reward/centered_abs_mean": 0.12451072931289672,
|
|
"signal/brier_reward/group_std_mean": 0.15938679575920106,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012451073713600635,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012451073713600635,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014077316224575042,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018521204963326453,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014077316503971816,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014077316503971816,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002460779994726181,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004416647460311651,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.075975109823048e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.075975109823048e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005778457596898079,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007743468787521124,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005778457503765822,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005778457503765822,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27691051959991453,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35814193487167356,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027691051363945007,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027691051363945007,
|
|
"step": 210
|
|
},
|
|
{
|
|
"calibration/aurc": 0.33490676273829906,
|
|
"calibration/batch_distribution_entropy": 0.9797438124139027,
|
|
"calibration/buffer_distribution_entropy": 0.9964835959383807,
|
|
"calibration/confidence_entropy": 0.49407399575377536,
|
|
"calibration/coverage@0%": 0.011328125,
|
|
"calibration/coverage@1%": 0.011328125,
|
|
"calibration/coverage@10%": 0.120703125,
|
|
"calibration/coverage@15%": 0.2234375,
|
|
"calibration/coverage@20%": 0.312109375,
|
|
"calibration/coverage@25%": 0.3828125,
|
|
"calibration/coverage@30%": 0.563671875,
|
|
"calibration/coverage@5%": 0.043359375,
|
|
"calibration/ece": 0.12613568302739456,
|
|
"calibration/mean_confidence": 0.4738847542957731,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 830.4,
|
|
"completions/max_terminated_length": 830.4,
|
|
"completions/mean_length": 212.3232421875,
|
|
"completions/mean_terminated_length": 212.44719848632812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 105.4,
|
|
"epoch": 0.688,
|
|
"grad_norm": 0.0009251743904314935,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0002,
|
|
"num_tokens": 725942355.0,
|
|
"reward": 0.9335816979408265,
|
|
"reward_std": 0.08509753495454789,
|
|
"rewards/accgated_coverage_0": 0.026787951961159705,
|
|
"rewards/accgated_coverage_1": 0.026787951961159705,
|
|
"rewards/accgated_coverage_10": 0.026786612719297408,
|
|
"rewards/accgated_coverage_15": 0.026709262281656265,
|
|
"rewards/accgated_coverage_20": 0.02241134848445654,
|
|
"rewards/accgated_coverage_25": 0.01569197904318571,
|
|
"rewards/accgated_coverage_5": 0.026787951961159705,
|
|
"rewards/accuracy_reward": 0.5232421875,
|
|
"rewards/brier_reward": 0.7955889105796814,
|
|
"rewards/confidence_uniqueness_reward": 0.9520182609558105,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.002821239922195673,
|
|
"rewards/frontier_ece_reward": 0.0037543469108641146,
|
|
"rewards/frontier_entropy_batch_reward": -0.20043619871139526,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.052465547993779185,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.066448612511158,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005246554780751467,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005246554780751467,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.052465547993779185,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.066448612511158,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005246554780751467,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005246554780751467,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.052459338679909706,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06644077599048615,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00524593386799097,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00524593386799097,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.05215623266994953,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06606373339891433,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005215623416006565,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005215623416006565,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04323282837867737,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.05498237237334251,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0043232828378677365,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0043232828378677365,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.023756309226155282,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.03036804832518101,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0023756310110911727,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0023756310110911727,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.052465547993779185,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.066448612511158,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005246554780751467,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005246554780751467,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1098876953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.14553710520267488,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05494384765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05494384765625,
|
|
"signal/advantage_abs_mean": 0.0653716504573822,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0653716504573822,
|
|
"signal/advantage_pre_scale_std": 0.10079272240400314,
|
|
"signal/advantage_std": 0.10079272240400314,
|
|
"signal/brier_reward/centered_abs_mean": 0.12485045194625854,
|
|
"signal/brier_reward/group_std_mean": 0.16001889407634734,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01248504538089037,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01248504538089037,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013444668985903263,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01862073801457882,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013444669311866164,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013444669311866164,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_std_mean": 0.0033145629800856113,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002512581180781126,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004061655001714825,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1407264032168314e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1407264032168314e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005848802160471678,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007683264184743166,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005848802160471678,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005848802160471678,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26048979461193084,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3358060121536255,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02604898065328598,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02604898065328598,
|
|
"step": 215
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28453259958201943,
|
|
"calibration/batch_distribution_entropy": 0.9729883365829831,
|
|
"calibration/buffer_distribution_entropy": 0.996587227545891,
|
|
"calibration/confidence_entropy": 0.4705053015114881,
|
|
"calibration/coverage@0%": 0.009375,
|
|
"calibration/coverage@1%": 0.009375,
|
|
"calibration/coverage@10%": 0.012890625,
|
|
"calibration/coverage@15%": 0.11836549045988258,
|
|
"calibration/coverage@20%": 0.2461013943248532,
|
|
"calibration/coverage@25%": 0.48790667808219174,
|
|
"calibration/coverage@30%": 0.6070847602739726,
|
|
"calibration/coverage@5%": 0.009375,
|
|
"calibration/ece": 0.1117538420563358,
|
|
"calibration/mean_confidence": 0.5099229643395624,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 730.8,
|
|
"completions/max_terminated_length": 730.8,
|
|
"completions/mean_length": 209.2603515625,
|
|
"completions/mean_terminated_length": 209.3418701171875,
|
|
"completions/min_length": 16.2,
|
|
"completions/min_terminated_length": 101.4,
|
|
"epoch": 0.704,
|
|
"grad_norm": 0.001095001120120287,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0002,
|
|
"num_tokens": 742951325.0,
|
|
"reward": 0.9403750419616699,
|
|
"reward_std": 0.08310929387807846,
|
|
"rewards/accgated_coverage_0": 0.03328470177948475,
|
|
"rewards/accgated_coverage_1": 0.03328470177948475,
|
|
"rewards/accgated_coverage_10": 0.03328111469745636,
|
|
"rewards/accgated_coverage_15": 0.03311664015054703,
|
|
"rewards/accgated_coverage_20": 0.02838711105287075,
|
|
"rewards/accgated_coverage_25": 0.018937293067574502,
|
|
"rewards/accgated_coverage_5": 0.03328470177948475,
|
|
"rewards/accuracy_reward": 0.52744140625,
|
|
"rewards/brier_reward": 0.8056316375732422,
|
|
"rewards/confidence_uniqueness_reward": 0.9524389386177063,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0029492788482457398,
|
|
"rewards/frontier_ece_reward": 0.0040812592953443525,
|
|
"rewards/frontier_entropy_batch_reward": -0.20637467503547668,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.045438441634178164,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.05873422995209694,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00454384433105588,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00454384433105588,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.045438441634178164,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.05873422995209694,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00454384433105588,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00454384433105588,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04543309956789017,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.058727345615625384,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004543309938162566,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004543309938162566,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.045103757083415984,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.05830207094550133,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0045103756710886955,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0045103756710886955,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.03521736077964306,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.045604909956455233,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0035217361990362408,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0035217361990362408,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.020598072186112405,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.026666931807994843,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.002059807279147208,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.002059807279147208,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.045438441634178164,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.05873422995209694,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00454384433105588,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00454384433105588,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.086187744140625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11752658784389496,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0430938720703125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0430938720703125,
|
|
"signal/advantage_abs_mean": 0.06388919427990913,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06388919427990913,
|
|
"signal/advantage_pre_scale_std": 0.09822125136852264,
|
|
"signal/advantage_std": 0.09822125136852264,
|
|
"signal/brier_reward/centered_abs_mean": 0.12010153383016586,
|
|
"signal/brier_reward/group_std_mean": 0.15592622756958008,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012010153383016586,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012010153383016586,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013271934166550637,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018078647926449774,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013271935051307083,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013271935051307083,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029013346415013076,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004842393286526203,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.626668330980465e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.626668330980465e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005852928943932056,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007757721655070782,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005852928617969156,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005852928617969156,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2692374408245087,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.345594185590744,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02692374512553215,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02692374512553215,
|
|
"step": 220
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24788575025993992,
|
|
"calibration/batch_distribution_entropy": 0.9844987021359772,
|
|
"calibration/buffer_distribution_entropy": 0.9965920491455448,
|
|
"calibration/confidence_entropy": 0.47441804644926594,
|
|
"calibration/coverage@0%": 0.027768010029354206,
|
|
"calibration/coverage@1%": 0.027768010029354206,
|
|
"calibration/coverage@10%": 0.2899714102250489,
|
|
"calibration/coverage@15%": 0.386472602739726,
|
|
"calibration/coverage@20%": 0.4865054733365949,
|
|
"calibration/coverage@25%": 0.5463062622309198,
|
|
"calibration/coverage@30%": 0.6338513637475538,
|
|
"calibration/coverage@5%": 0.14147352005870842,
|
|
"calibration/ece": 0.12980114962746656,
|
|
"calibration/mean_confidence": 0.5243642720011021,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 642.8,
|
|
"completions/max_terminated_length": 642.8,
|
|
"completions/mean_length": 207.5609375,
|
|
"completions/mean_terminated_length": 207.58056030273437,
|
|
"completions/min_length": 80.2,
|
|
"completions/min_terminated_length": 100.8,
|
|
"epoch": 0.72,
|
|
"grad_norm": 0.0009799289982765913,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0,
|
|
"num_tokens": 760086605.0,
|
|
"reward": 0.9555520176887512,
|
|
"reward_std": 0.08307535648345947,
|
|
"rewards/accgated_coverage_0": 0.03051256462931633,
|
|
"rewards/accgated_coverage_1": 0.03051256462931633,
|
|
"rewards/accgated_coverage_10": 0.03051130548119545,
|
|
"rewards/accgated_coverage_15": 0.030381328240036965,
|
|
"rewards/accgated_coverage_20": 0.02490551434457302,
|
|
"rewards/accgated_coverage_25": 0.01838742271065712,
|
|
"rewards/accgated_coverage_5": 0.03051256462931633,
|
|
"rewards/accuracy_reward": 0.55830078125,
|
|
"rewards/brier_reward": 0.8164411544799804,
|
|
"rewards/confidence_uniqueness_reward": 0.9524749159812927,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002484840899705887,
|
|
"rewards/frontier_ece_reward": 0.004225656203925609,
|
|
"rewards/frontier_entropy_batch_reward": -0.20405004620552064,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04963188543915749,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06388133987784386,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004963188711553812,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004963188711553812,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04963188543915749,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06388133987784386,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004963188711553812,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004963188711553812,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.049627379328012464,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06387575715780258,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004962737904861569,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004962737904861569,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.049022985994815825,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06312333792448044,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004902298748493195,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004902298748493195,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.03493177182972431,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.045493639260530475,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0034931772388517857,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0034931772388517857,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.02032623775303364,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.02634006626904011,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.00203262388240546,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.00203262388240546,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04963188543915749,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06388133987784386,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004963188711553812,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004963188711553812,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.100262451171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.13359598368406295,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0501312255859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0501312255859375,
|
|
"signal/advantage_abs_mean": 0.06428440287709236,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06428440287709236,
|
|
"signal/advantage_pre_scale_std": 0.09776978790760041,
|
|
"signal/advantage_std": 0.09776978790760041,
|
|
"signal/brier_reward/centered_abs_mean": 0.10959625095129014,
|
|
"signal/brier_reward/group_std_mean": 0.1432813137769699,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010959625616669655,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010959625616669655,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012161934934556484,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015593766607344151,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012161935213953257,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012161935213953257,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024530492490157487,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004177492624148726,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.066311764996499e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.066311764996499e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005394628457725048,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007084634527564049,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005394628620706499,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005394628620706499,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.263569763302803,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33859267830848694,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026356976479291916,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026356976479291916,
|
|
"step": 225
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27484888127610524,
|
|
"calibration/batch_distribution_entropy": 0.9747183047227347,
|
|
"calibration/buffer_distribution_entropy": 0.996566131942201,
|
|
"calibration/confidence_entropy": 0.47252951594941683,
|
|
"calibration/coverage@0%": 0.029343505381604695,
|
|
"calibration/coverage@1%": 0.029343505381604695,
|
|
"calibration/coverage@10%": 0.13293098703522505,
|
|
"calibration/coverage@15%": 0.18258087695694716,
|
|
"calibration/coverage@20%": 0.39551278131115464,
|
|
"calibration/coverage@25%": 0.5018078828277887,
|
|
"calibration/coverage@30%": 0.633496667074364,
|
|
"calibration/coverage@5%": 0.09656616927592955,
|
|
"calibration/ece": 0.12579707874867294,
|
|
"calibration/mean_confidence": 0.5529484226759458,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 582.8,
|
|
"completions/max_terminated_length": 582.8,
|
|
"completions/mean_length": 206.87109375,
|
|
"completions/mean_terminated_length": 206.9926025390625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 96.6,
|
|
"epoch": 0.736,
|
|
"grad_norm": 0.00072222959715873,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0007,
|
|
"num_tokens": 777144549.0,
|
|
"reward": 0.9504972577095032,
|
|
"reward_std": 0.08325774669647217,
|
|
"rewards/accgated_coverage_0": 0.0285327211022377,
|
|
"rewards/accgated_coverage_1": 0.0285327211022377,
|
|
"rewards/accgated_coverage_10": 0.02853233776986599,
|
|
"rewards/accgated_coverage_15": 0.02839807290583849,
|
|
"rewards/accgated_coverage_20": 0.0235455721616745,
|
|
"rewards/accgated_coverage_25": 0.01733865328133106,
|
|
"rewards/accgated_coverage_5": 0.0285327211022377,
|
|
"rewards/accuracy_reward": 0.554296875,
|
|
"rewards/brier_reward": 0.8032742261886596,
|
|
"rewards/confidence_uniqueness_reward": 0.9524643421173096,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.0029948077630251647,
|
|
"rewards/frontier_ece_reward": 0.003622399689629674,
|
|
"rewards/frontier_entropy_batch_reward": -0.20598133206367492,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.05004717260599136,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06393795162439346,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005004717502743006,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005004717502743006,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.05004717260599136,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06393795162439346,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005004717502743006,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005004717502743006,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.050046234577894214,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06393673494458199,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005004623578861356,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005004623578861356,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.049556747823953626,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.0633281297981739,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004955675080418586,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004955675080418586,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.03371400721371174,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.04351087808609009,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0033714008051902054,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0033714008051902054,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.0200997706502676,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.02584904506802559,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.002009977027773857,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.002009977027773857,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.05004717260599136,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06393795162439346,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005004717502743006,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005004717502743006,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08973388671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1211128681898117,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044866943359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044866943359375,
|
|
"signal/advantage_abs_mean": 0.06399662345647812,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06399662345647812,
|
|
"signal/advantage_pre_scale_std": 0.09707808792591095,
|
|
"signal/advantage_std": 0.09707808792591095,
|
|
"signal/brier_reward/centered_abs_mean": 0.11655503362417222,
|
|
"signal/brier_reward/group_std_mean": 0.15057767629623414,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011655503325164318,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011655503325164318,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012995177507400512,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018023890629410743,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012995177647098898,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012995177647098898,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_std_mean": 0.0033145629800856113,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002793784369714558,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004505346901714802,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.492230316624045e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.492230316624045e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005415247846394777,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007059116475284099,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000541524775326252,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000541524775326252,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27687177062034607,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35106891989707945,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027687177062034607,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027687177062034607,
|
|
"step": 230
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2660196836183052,
|
|
"calibration/batch_distribution_entropy": 0.9692057287077087,
|
|
"calibration/buffer_distribution_entropy": 0.9965957295174226,
|
|
"calibration/confidence_entropy": 0.47005762391068373,
|
|
"calibration/coverage@0%": 0.044140625,
|
|
"calibration/coverage@1%": 0.044140625,
|
|
"calibration/coverage@10%": 0.19375,
|
|
"calibration/coverage@15%": 0.26875,
|
|
"calibration/coverage@20%": 0.378515625,
|
|
"calibration/coverage@25%": 0.50078125,
|
|
"calibration/coverage@30%": 0.623046875,
|
|
"calibration/coverage@5%": 0.087109375,
|
|
"calibration/ece": 0.12177968601544795,
|
|
"calibration/mean_confidence": 0.46668987727826927,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 650.8,
|
|
"completions/max_terminated_length": 650.8,
|
|
"completions/mean_length": 206.2876953125,
|
|
"completions/mean_terminated_length": 206.3489959716797,
|
|
"completions/min_length": 41.4,
|
|
"completions/min_terminated_length": 104.2,
|
|
"epoch": 0.752,
|
|
"grad_norm": 0.0007596755749545991,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 794484135.0,
|
|
"reward": 0.9476239919662476,
|
|
"reward_std": 0.08365670591592789,
|
|
"rewards/accgated_coverage_0": 0.02502902615815401,
|
|
"rewards/accgated_coverage_1": 0.02502902615815401,
|
|
"rewards/accgated_coverage_10": 0.02502902615815401,
|
|
"rewards/accgated_coverage_15": 0.02488698102533817,
|
|
"rewards/accgated_coverage_20": 0.018017900735139848,
|
|
"rewards/accgated_coverage_25": 0.014435861306264997,
|
|
"rewards/accgated_coverage_5": 0.02502902615815401,
|
|
"rewards/accuracy_reward": 0.55205078125,
|
|
"rewards/brier_reward": 0.8026696920394898,
|
|
"rewards/confidence_uniqueness_reward": 0.9525475263595581,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.00277663916349411,
|
|
"rewards/frontier_ece_reward": 0.0033977875020354984,
|
|
"rewards/frontier_entropy_batch_reward": -0.19778555929660796,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.048104815930128095,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06266987174749375,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00481048165820539,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00481048165820539,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.048104815930128095,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06266987174749375,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00481048165820539,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00481048165820539,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.048104815930128095,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06266987174749375,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00481048165820539,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00481048165820539,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04780370891094208,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.062286855280399324,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004780371021479368,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004780371021479368,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.034034205600619316,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.044576478004455564,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.00340342060662806,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.00340342060662806,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.019152706488966942,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.02505219243466854,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.001915270695462823,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.001915270695462823,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.048104815930128095,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06266987174749375,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00481048165820539,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00481048165820539,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.091473388671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12521128356456757,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0457366943359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0457366943359375,
|
|
"signal/advantage_abs_mean": 0.06399512514472008,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06399512514472008,
|
|
"signal/advantage_pre_scale_std": 0.09923620074987412,
|
|
"signal/advantage_std": 0.09923620074987412,
|
|
"signal/brier_reward/centered_abs_mean": 0.11026464402675629,
|
|
"signal/brier_reward/group_std_mean": 0.1442580610513687,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011026464402675629,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011026464402675629,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012701518088579177,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017074212618172168,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012701518135145307,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012701518135145307,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024226987501606346,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0040211153216660024,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0283733940450476e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0283733940450476e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004950394947081804,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00653142724186182,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004950395144987851,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004950395144987851,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26609220504760744,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34021600484848025,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026609221845865248,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026609221845865248,
|
|
"step": 235
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28641378674932866,
|
|
"calibration/batch_distribution_entropy": 0.9841637432428328,
|
|
"calibration/buffer_distribution_entropy": 0.9968625010942876,
|
|
"calibration/confidence_entropy": 0.5034196161851294,
|
|
"calibration/coverage@0%": 0.05117799045988258,
|
|
"calibration/coverage@1%": 0.08711549045988258,
|
|
"calibration/coverage@10%": 0.21445924045988257,
|
|
"calibration/coverage@15%": 0.2980743945694716,
|
|
"calibration/coverage@20%": 0.38250214041095887,
|
|
"calibration/coverage@25%": 0.4681101700097847,
|
|
"calibration/coverage@30%": 0.5697697529354208,
|
|
"calibration/coverage@5%": 0.16211549045988258,
|
|
"calibration/ece": 0.15185750949544422,
|
|
"calibration/mean_confidence": 0.4985759031283967,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 743.0,
|
|
"completions/max_terminated_length": 743.0,
|
|
"completions/mean_length": 212.01640625,
|
|
"completions/mean_terminated_length": 212.0368225097656,
|
|
"completions/min_length": 81.2,
|
|
"completions/min_terminated_length": 103.2,
|
|
"epoch": 0.768,
|
|
"grad_norm": 0.0006264409748837352,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 811587887.0,
|
|
"reward": 0.9234783053398132,
|
|
"reward_std": 0.08054482340812683,
|
|
"rewards/accgated_coverage_0": 0.027358325943350793,
|
|
"rewards/accgated_coverage_1": 0.027358325943350793,
|
|
"rewards/accgated_coverage_10": 0.027348564751446248,
|
|
"rewards/accgated_coverage_15": 0.027348769642412663,
|
|
"rewards/accgated_coverage_20": 0.02053585313260555,
|
|
"rewards/accgated_coverage_25": 0.014650637470185756,
|
|
"rewards/accgated_coverage_5": 0.027357107028365135,
|
|
"rewards/accuracy_reward": 0.49853515625,
|
|
"rewards/brier_reward": 0.8120542526245117,
|
|
"rewards/confidence_uniqueness_reward": 0.9526609897613525,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0028295089956372975,
|
|
"rewards/frontier_ece_reward": 0.0036727309226989744,
|
|
"rewards/frontier_entropy_batch_reward": -0.19739624857902527,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04027009829878807,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.05210058838129043,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004027010034769773,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004027010034769773,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04027009829878807,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.05210058838129043,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004027010034769773,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004027010034769773,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04025917798280716,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.05208579152822494,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004025917826220393,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004025917826220393,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.039824848622083665,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.051523523032665254,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.003982485039159656,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.003982485039159656,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.025421395525336267,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.03290306515991688,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0025421395897865296,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0025421395897865296,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.015859098732471467,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.020310256630182266,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0015859099105000496,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0015859099105000496,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.040269167721271516,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.05209931433200836,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004026917088776827,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004026917088776827,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.083489990234375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11638489514589309,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0417449951171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0417449951171875,
|
|
"signal/advantage_abs_mean": 0.06172212138772011,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06172212138772011,
|
|
"signal/advantage_pre_scale_std": 0.09572511464357376,
|
|
"signal/advantage_std": 0.09572511464357376,
|
|
"signal/brier_reward/centered_abs_mean": 0.10978365540504456,
|
|
"signal/brier_reward/group_std_mean": 0.1418785959482193,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010978365503251552,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010978365503251552,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011818506009876727,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015079454332590104,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00118185062892735,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00118185062892735,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002496037329547107,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004109069146215916,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.120046822004952e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.120046822004952e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004792108759284019,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006286134757101536,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004792108782567084,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004792108782567084,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2550091713666916,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3290919542312622,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025500917807221413,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025500917807221413,
|
|
"step": 240
|
|
},
|
|
{
|
|
"calibration/aurc": 0.32671411407310635,
|
|
"calibration/batch_distribution_entropy": 0.9782006759163547,
|
|
"calibration/buffer_distribution_entropy": 0.9969583395265618,
|
|
"calibration/confidence_entropy": 0.4687766027810496,
|
|
"calibration/coverage@0%": 0.02578125,
|
|
"calibration/coverage@1%": 0.02578125,
|
|
"calibration/coverage@10%": 0.170703125,
|
|
"calibration/coverage@15%": 0.25546875,
|
|
"calibration/coverage@20%": 0.308203125,
|
|
"calibration/coverage@25%": 0.359375,
|
|
"calibration/coverage@30%": 0.471875,
|
|
"calibration/coverage@5%": 0.133984375,
|
|
"calibration/ece": 0.14992271913099717,
|
|
"calibration/mean_confidence": 0.5172538257230894,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 745.2,
|
|
"completions/max_terminated_length": 745.2,
|
|
"completions/mean_length": 211.4888671875,
|
|
"completions/mean_terminated_length": 211.57101745605468,
|
|
"completions/min_length": 20.4,
|
|
"completions/min_terminated_length": 101.6,
|
|
"epoch": 0.784,
|
|
"grad_norm": 0.000795921718236059,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0,
|
|
"num_tokens": 828927901.0,
|
|
"reward": 0.9365089654922485,
|
|
"reward_std": 0.0868492677807808,
|
|
"rewards/accgated_coverage_0": 0.02046150788664818,
|
|
"rewards/accgated_coverage_1": 0.02046150788664818,
|
|
"rewards/accgated_coverage_10": 0.020463902130723,
|
|
"rewards/accgated_coverage_15": 0.01999166887253523,
|
|
"rewards/accgated_coverage_20": 0.016766261495649814,
|
|
"rewards/accgated_coverage_25": 0.01441083662211895,
|
|
"rewards/accgated_coverage_5": 0.02046313285827637,
|
|
"rewards/accuracy_reward": 0.53837890625,
|
|
"rewards/brier_reward": 0.7908718585968018,
|
|
"rewards/confidence_uniqueness_reward": 0.9525047183036804,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0030904591083526613,
|
|
"rewards/frontier_ece_reward": 0.0025389259913936256,
|
|
"rewards/frontier_entropy_batch_reward": -0.20339978933334352,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.051776818186044696,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06660348773002625,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00517768207937479,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00517768207937479,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.051776818186044696,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06660348773002625,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00517768207937479,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00517768207937479,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.05176556333899498,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06658939719200134,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0051765562500804664,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0051765562500804664,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.050340484082698825,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06477305367588997,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0050340484827756885,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0050340484827756885,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.03024843893945217,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.03920280672609806,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0030248438473790885,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0030248438473790885,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.018703461810946464,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.02392947468906641,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0018703461857512594,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0018703461857512594,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.05176918432116508,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06659393087029457,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005176918627694249,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005176918627694249,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.105316162109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.13590774238109588,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0526580810546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0526580810546875,
|
|
"signal/advantage_abs_mean": 0.0690420001745224,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0690420001745224,
|
|
"signal/advantage_pre_scale_std": 0.10285361707210541,
|
|
"signal/advantage_std": 0.10285361707210541,
|
|
"signal/brier_reward/centered_abs_mean": 0.11876424252986909,
|
|
"signal/brier_reward/group_std_mean": 0.15292527675628662,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011876424588263036,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011876424588263036,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012877122312784196,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01730199046432972,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001287712249904871,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001287712249904871,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027613468701019883,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004532812442630529,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.451683660387062e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.451683660387062e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004731657728552818,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006212034169584513,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004731657856609672,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004731657856609672,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2753194272518158,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3491884648799896,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02753194384276867,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02753194384276867,
|
|
"step": 245
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22368977492292014,
|
|
"calibration/batch_distribution_entropy": 0.9765782788641488,
|
|
"calibration/buffer_distribution_entropy": 0.9968784343575339,
|
|
"calibration/confidence_entropy": 0.4779808267726235,
|
|
"calibration/coverage@0%": 0.01875840875733855,
|
|
"calibration/coverage@1%": 0.04922715875733855,
|
|
"calibration/coverage@10%": 0.22823966487279845,
|
|
"calibration/coverage@15%": 0.3611607142857143,
|
|
"calibration/coverage@20%": 0.5086227984344422,
|
|
"calibration/coverage@25%": 0.6490284063111545,
|
|
"calibration/coverage@30%": 0.722919214774951,
|
|
"calibration/coverage@5%": 0.14610215875733856,
|
|
"calibration/ece": 0.11821748282284754,
|
|
"calibration/mean_confidence": 0.4770415499416723,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 582.2,
|
|
"completions/max_terminated_length": 582.2,
|
|
"completions/mean_length": 212.08173828125,
|
|
"completions/mean_terminated_length": 212.14385375976562,
|
|
"completions/min_length": 42.4,
|
|
"completions/min_terminated_length": 105.8,
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.0008366380352526903,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 846110178.0,
|
|
"reward": 0.9612038016319275,
|
|
"reward_std": 0.07943681925535202,
|
|
"rewards/accgated_coverage_0": 0.03096870370209217,
|
|
"rewards/accgated_coverage_1": 0.03096870370209217,
|
|
"rewards/accgated_coverage_10": 0.03096686936914921,
|
|
"rewards/accgated_coverage_15": 0.029813123494386674,
|
|
"rewards/accgated_coverage_20": 0.022094443440437317,
|
|
"rewards/accgated_coverage_25": 0.01778464615345001,
|
|
"rewards/accgated_coverage_5": 0.030968816578388215,
|
|
"rewards/accuracy_reward": 0.57333984375,
|
|
"rewards/brier_reward": 0.8080012440681458,
|
|
"rewards/confidence_uniqueness_reward": 0.9516256332397461,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0028674704488366844,
|
|
"rewards/frontier_ece_reward": 0.0028016922762617467,
|
|
"rewards/frontier_entropy_batch_reward": -0.20883174240589142,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.05066419094800949,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06535674557089806,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005066419020295143,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005066419020295143,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.05066419094800949,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06535674557089806,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005066419020295143,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005066419020295143,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.050653228908777236,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06534308791160584,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005065322946757078,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005065322946757078,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04801043793559075,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06195661723613739,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00480104386806488,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00480104386806488,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.02948525659739971,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.03847551196813583,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0029485255479812624,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0029485255479812624,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.01872854121029377,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.024161659926176072,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.00187285419087857,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.00187285419087857,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.05066225081682205,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.0653543896973133,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005066225025802851,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005066225025802851,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.092132568359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11959045678377152,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0460662841796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0460662841796875,
|
|
"signal/advantage_abs_mean": 0.0625168263912201,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0625168263912201,
|
|
"signal/advantage_pre_scale_std": 0.09564936310052871,
|
|
"signal/advantage_std": 0.09564936310052871,
|
|
"signal/brier_reward/centered_abs_mean": 0.10817041993141174,
|
|
"signal/brier_reward/group_std_mean": 0.13942115157842636,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010817042179405689,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010817042179405689,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013105977326631546,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01724378876388073,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001310597755946219,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001310597755946219,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025355865713208915,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004124428937211632,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.169483316014521e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.169483316014521e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00470409793779254,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006183451414108277,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00047040980425663295,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00047040980425663295,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26716206073760984,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33822156190872193,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0267162062227726,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0267162062227726,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"eval_calibration/aurc": 0.46588043488860353,
|
|
"eval_calibration/batch_distribution_entropy": 0.9194377245046288,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9969822940081324,
|
|
"eval_calibration/confidence_entropy": 0.47392606359196077,
|
|
"eval_calibration/coverage@0%": 0.0390625,
|
|
"eval_calibration/coverage@1%": 0.0390625,
|
|
"eval_calibration/coverage@10%": 0.0390625,
|
|
"eval_calibration/coverage@15%": 0.0390625,
|
|
"eval_calibration/coverage@20%": 0.1953125,
|
|
"eval_calibration/coverage@25%": 0.25,
|
|
"eval_calibration/coverage@30%": 0.328125,
|
|
"eval_calibration/coverage@5%": 0.0390625,
|
|
"eval_calibration/ece": 0.17888477450331003,
|
|
"eval_calibration/mean_confidence": 0.4776080658371867,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 390.0,
|
|
"eval_completions/max_terminated_length": 390.0,
|
|
"eval_completions/mean_length": 217.4969024658203,
|
|
"eval_completions/mean_terminated_length": 217.4969024658203,
|
|
"eval_completions/min_length": 124.75,
|
|
"eval_completions/min_terminated_length": 124.75,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 846110178.0,
|
|
"eval_reward": 0.8083221763372421,
|
|
"eval_reward_std": 0.2187192626297474,
|
|
"eval_rewards/accgated_coverage_0": 0.03624272719025612,
|
|
"eval_rewards/accgated_coverage_1": 0.03624272719025612,
|
|
"eval_rewards/accgated_coverage_10": 0.03623865591362119,
|
|
"eval_rewards/accgated_coverage_15": 0.0346625130623579,
|
|
"eval_rewards/accgated_coverage_20": 0.022707284428179264,
|
|
"eval_rewards/accgated_coverage_25": 0.013365113409236073,
|
|
"eval_rewards/accgated_coverage_5": 0.03624272719025612,
|
|
"eval_rewards/accuracy_reward": 0.43359375,
|
|
"eval_rewards/brier_reward": 0.8064423203468323,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.89013671875,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0033500646241009235,
|
|
"eval_rewards/frontier_ece_reward": 0.0033910262282006443,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 19.9553,
|
|
"eval_samples_per_second": 25.056,
|
|
"eval_signal/accgated_coverage_0/centered_abs_mean": 0.07163327932357788,
|
|
"eval_signal/accgated_coverage_0/group_std_mean": 0.08617032133042812,
|
|
"eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007163328235037625,
|
|
"eval_signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007163328235037625,
|
|
"eval_signal/accgated_coverage_1/centered_abs_mean": 0.07163327932357788,
|
|
"eval_signal/accgated_coverage_1/group_std_mean": 0.08617032133042812,
|
|
"eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007163328235037625,
|
|
"eval_signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007163328235037625,
|
|
"eval_signal/accgated_coverage_10/centered_abs_mean": 0.07161696534603834,
|
|
"eval_signal/accgated_coverage_10/group_std_mean": 0.08615143597126007,
|
|
"eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007161696790717542,
|
|
"eval_signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007161696790717542,
|
|
"eval_signal/accgated_coverage_15/centered_abs_mean": 0.06835009530186653,
|
|
"eval_signal/accgated_coverage_15/group_std_mean": 0.08244646713137627,
|
|
"eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006835010135546327,
|
|
"eval_signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006835010135546327,
|
|
"eval_signal/accgated_coverage_20/centered_abs_mean": 0.039841676130890846,
|
|
"eval_signal/accgated_coverage_20/group_std_mean": 0.04926642868667841,
|
|
"eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.003984167706221342,
|
|
"eval_signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.003984167706221342,
|
|
"eval_signal/accgated_coverage_25/centered_abs_mean": 0.019560284446924925,
|
|
"eval_signal/accgated_coverage_25/group_std_mean": 0.02460642997175455,
|
|
"eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0019560285727493465,
|
|
"eval_signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0019560285727493465,
|
|
"eval_signal/accgated_coverage_5/centered_abs_mean": 0.07163327932357788,
|
|
"eval_signal/accgated_coverage_5/group_std_mean": 0.08617032133042812,
|
|
"eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007163328235037625,
|
|
"eval_signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007163328235037625,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.478271484375,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49664156883955,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2391357421875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2391357421875,
|
|
"eval_signal/advantage_abs_mean": 0.20329082757234573,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20329082757234573,
|
|
"eval_signal/advantage_pre_scale_std": 0.21654605492949486,
|
|
"eval_signal/advantage_std": 0.21654605492949486,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.1918768174946308,
|
|
"eval_signal/brier_reward/group_std_mean": 0.24410802125930786,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01918768184259534,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01918768184259534,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0425872802734375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.051558976992964745,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004258728236891329,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004258728236891329,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0042505175224505365,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.00803718576207757,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.313147175911581e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.313147175911581e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.00573124154470861,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.007669370388612151,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005731241835746914,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005731241835746914,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.2,
|
|
"step": 250
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25161858829445716,
|
|
"calibration/batch_distribution_entropy": 0.9604955477620883,
|
|
"calibration/buffer_distribution_entropy": 0.9968190693662825,
|
|
"calibration/confidence_entropy": 0.4511546023403012,
|
|
"calibration/coverage@0%": 0.02621238992172211,
|
|
"calibration/coverage@1%": 0.02621238992172211,
|
|
"calibration/coverage@10%": 0.11419551125244617,
|
|
"calibration/coverage@15%": 0.19276388209393347,
|
|
"calibration/coverage@20%": 0.2979123348825832,
|
|
"calibration/coverage@25%": 0.68237448018591,
|
|
"calibration/coverage@30%": 0.8054488747553815,
|
|
"calibration/coverage@5%": 0.06257873654598825,
|
|
"calibration/ece": 0.13867088896719254,
|
|
"calibration/mean_confidence": 0.5401082680443932,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 639.8,
|
|
"completions/max_terminated_length": 639.8,
|
|
"completions/mean_length": 215.06240234375,
|
|
"completions/mean_terminated_length": 215.10444030761718,
|
|
"completions/min_length": 63.4,
|
|
"completions/min_terminated_length": 105.2,
|
|
"epoch": 0.816,
|
|
"grad_norm": 0.0009888941422104836,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 863411585.0,
|
|
"reward": 0.953099250793457,
|
|
"reward_std": 0.08940376788377762,
|
|
"rewards/accgated_coverage_0": 0.019826328055933116,
|
|
"rewards/accgated_coverage_1": 0.019826328055933116,
|
|
"rewards/accgated_coverage_10": 0.01982517270371318,
|
|
"rewards/accgated_coverage_15": 0.01932367868721485,
|
|
"rewards/accgated_coverage_20": 0.01630774438381195,
|
|
"rewards/accgated_coverage_25": 0.014926259219646455,
|
|
"rewards/accgated_coverage_5": 0.019826328055933116,
|
|
"rewards/accuracy_reward": 0.57373046875,
|
|
"rewards/brier_reward": 0.7885306715965271,
|
|
"rewards/confidence_uniqueness_reward": 0.9522530317306519,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.003217545850202441,
|
|
"rewards/frontier_ece_reward": 0.0024582074489444493,
|
|
"rewards/frontier_entropy_batch_reward": -0.20889662504196166,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.05354453325271606,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06835601478815079,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005354453343898058,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005354453343898058,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.05354453325271606,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06835601478815079,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005354453343898058,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005354453343898058,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.05353115946054458,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06833992823958397,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005353116150945425,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005353116150945425,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.05107894092798233,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06524311304092408,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005107894167304039,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005107894167304039,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.0296579971909523,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.03823278471827507,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.002965799765661359,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.002965799765661359,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.0198915496468544,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.025325778871774673,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0019891550531610847,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0019891550531610847,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.05354453325271606,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06835601478815079,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005354453343898058,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005354453343898058,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.107867431640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.13981811404228212,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0539337158203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0539337158203125,
|
|
"signal/advantage_abs_mean": 0.07016213089227677,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07016213089227677,
|
|
"signal/advantage_pre_scale_std": 0.10725255310535431,
|
|
"signal/advantage_std": 0.10725255310535431,
|
|
"signal/brier_reward/centered_abs_mean": 0.12337229251861573,
|
|
"signal/brier_reward/group_std_mean": 0.15659765005111695,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012337229400873184,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012337229400873184,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012703911028802395,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016831454075872897,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012703910935670138,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012703910935670138,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00313384085893631,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005052349204197526,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9173011464299635e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9173011464299635e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004800934810191393,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006293817330151797,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00048009351012296976,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00048009351012296976,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2696466028690338,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34208568930625916,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026964660733938217,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026964660733938217,
|
|
"step": 255
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24765935565406477,
|
|
"calibration/batch_distribution_entropy": 0.9672541614417518,
|
|
"calibration/buffer_distribution_entropy": 0.9964371976653,
|
|
"calibration/confidence_entropy": 0.4697584302475404,
|
|
"calibration/coverage@0%": 0.08489413659299336,
|
|
"calibration/coverage@1%": 0.09467887240512643,
|
|
"calibration/coverage@10%": 0.28215328340240203,
|
|
"calibration/coverage@15%": 0.33649990646943706,
|
|
"calibration/coverage@20%": 0.41351886259640847,
|
|
"calibration/coverage@25%": 0.5323392833160662,
|
|
"calibration/coverage@30%": 0.6261550934586163,
|
|
"calibration/coverage@5%": 0.21994440327500864,
|
|
"calibration/ece": 0.09697334967073587,
|
|
"calibration/mean_confidence": 0.48439683040980785,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0009765625,
|
|
"completions/max_length": 697.6,
|
|
"completions/max_terminated_length": 697.6,
|
|
"completions/mean_length": 221.90908203125,
|
|
"completions/mean_terminated_length": 222.12615051269532,
|
|
"completions/min_length": 43.4,
|
|
"completions/min_terminated_length": 106.4,
|
|
"epoch": 0.832,
|
|
"grad_norm": 0.0006920217419974506,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0005,
|
|
"num_tokens": 880692286.0,
|
|
"reward": 0.9494648218154907,
|
|
"reward_std": 0.08398343473672867,
|
|
"rewards/accgated_coverage_0": 0.03729419596493244,
|
|
"rewards/accgated_coverage_1": 0.03729419596493244,
|
|
"rewards/accgated_coverage_10": 0.03728927373886108,
|
|
"rewards/accgated_coverage_15": 0.03530678525567055,
|
|
"rewards/accgated_coverage_20": 0.024978424608707427,
|
|
"rewards/accgated_coverage_25": 0.020059302635490894,
|
|
"rewards/accgated_coverage_5": 0.03729419596493244,
|
|
"rewards/accuracy_reward": 0.5490234375,
|
|
"rewards/brier_reward": 0.8231766700744629,
|
|
"rewards/confidence_uniqueness_reward": 0.9490555763244629,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_aurc_reward": -0.002319850795902312,
|
|
"rewards/frontier_ece_reward": 0.00325658256188035,
|
|
"rewards/frontier_entropy_batch_reward": -0.2503013014793396,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04462068974971771,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.05807742029428482,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004462068993598222,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004462068993598222,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04462068974971771,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.05807742029428482,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004462068993598222,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004462068993598222,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04461221098899841,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.058065980672836304,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004461221117526293,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004461221117526293,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04177615866065025,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.054360844939947126,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004177615791559219,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004177615791559219,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.026147307828068733,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.033843887597322465,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0026147309225052597,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0026147309225052597,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.017483064904808998,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.02221609316766262,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0017483065836131572,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0017483065836131572,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04462068974971771,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.05807742029428482,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004462068993598222,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004462068993598222,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0932373046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12756348997354508,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04661865234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04661865234375,
|
|
"signal/advantage_abs_mean": 0.06364405304193496,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06364405304193496,
|
|
"signal/advantage_pre_scale_std": 0.10019244253635406,
|
|
"signal/advantage_std": 0.10019244253635406,
|
|
"signal/brier_reward/centered_abs_mean": 0.10104852169752121,
|
|
"signal/brier_reward/group_std_mean": 0.132098488509655,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010104852169752121,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010104852169752121,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01508812140673399,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02120809331536293,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001508812210522592,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001508812210522592,
|
|
"signal/format_reward/centered_abs_mean": 0.00186767578125,
|
|
"signal/format_reward/group_std_mean": 0.00485165468417108,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000933837890625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000933837890625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018905135337263345,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0030176178086549045,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3631419753655792e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3631419753655792e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004522326309233904,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005910783167928457,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004522326402366161,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004522326402366161,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29563444256782534,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36747732758522034,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02956344522535801,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02956344522535801,
|
|
"step": 260
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3271325458178449,
|
|
"calibration/batch_distribution_entropy": 0.9662770451559457,
|
|
"calibration/buffer_distribution_entropy": 0.9963845668571439,
|
|
"calibration/confidence_entropy": 0.4835711156903063,
|
|
"calibration/coverage@0%": 0.008203125,
|
|
"calibration/coverage@1%": 0.008203125,
|
|
"calibration/coverage@10%": 0.185546875,
|
|
"calibration/coverage@15%": 0.237109375,
|
|
"calibration/coverage@20%": 0.294140625,
|
|
"calibration/coverage@25%": 0.475,
|
|
"calibration/coverage@30%": 0.523046875,
|
|
"calibration/coverage@5%": 0.13203125,
|
|
"calibration/ece": 0.14876966321369578,
|
|
"calibration/mean_confidence": 0.5546630362367326,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 968.2,
|
|
"completions/max_terminated_length": 968.2,
|
|
"completions/mean_length": 222.33134765625,
|
|
"completions/mean_terminated_length": 222.5053680419922,
|
|
"completions/min_length": 22.0,
|
|
"completions/min_terminated_length": 108.2,
|
|
"epoch": 0.848,
|
|
"grad_norm": 0.0007502317312173545,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0003,
|
|
"num_tokens": 897983327.0,
|
|
"reward": 0.9353123426437377,
|
|
"reward_std": 0.08215008527040482,
|
|
"rewards/accgated_coverage_0": 0.029652020335197447,
|
|
"rewards/accgated_coverage_1": 0.029652020335197447,
|
|
"rewards/accgated_coverage_10": 0.029647645354270936,
|
|
"rewards/accgated_coverage_15": 0.027789150178432465,
|
|
"rewards/accgated_coverage_20": 0.018689888529479504,
|
|
"rewards/accgated_coverage_25": 0.01452134121209383,
|
|
"rewards/accgated_coverage_5": 0.029652020335197447,
|
|
"rewards/accuracy_reward": 0.52490234375,
|
|
"rewards/brier_reward": 0.8078467130661011,
|
|
"rewards/confidence_uniqueness_reward": 0.9516240000724793,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.003087950637564063,
|
|
"rewards/frontier_ece_reward": 0.003099389187991619,
|
|
"rewards/frontier_entropy_batch_reward": -0.20927042365074158,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.03783770278096199,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.050519751757383345,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.003783770464360714,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.003783770464360714,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.03783770278096199,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.050519751757383345,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.003783770464360714,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.003783770464360714,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.03783150315284729,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.0505112536251545,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.003783150389790535,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.003783150389790535,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.0356540959328413,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.04766347408294678,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.003565409732982516,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.003565409732982516,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.021960367262363435,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.029427655786275864,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.002196036884561181,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.002196036884561181,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.01458423975855112,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.0191590566188097,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0014584239572286606,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0014584239572286606,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.03783770278096199,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.050519751757383345,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.003783770464360714,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.003783770464360714,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.082537841796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.11387998014688491,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0412689208984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0412689208984375,
|
|
"signal/advantage_abs_mean": 0.06302751824259759,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06302751824259759,
|
|
"signal/advantage_pre_scale_std": 0.1000214621424675,
|
|
"signal/advantage_std": 0.1000214621424675,
|
|
"signal/brier_reward/centered_abs_mean": 0.10466258078813553,
|
|
"signal/brier_reward/group_std_mean": 0.13769466578960418,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010466258227825164,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010466258227825164,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013133116811513901,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018831767141819,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013133117696270346,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013133117696270346,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_std_mean": 0.004419417306780815,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002753878058865666,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004520110785961151,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.442347588133998e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.442347588133998e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004349597357213497,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005748180858790875,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00043495974387042224,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00043495974387042224,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27231648564338684,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34300028085708617,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027231648564338684,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027231648564338684,
|
|
"step": 265
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2760374266890062,
|
|
"calibration/batch_distribution_entropy": 0.9531517789072197,
|
|
"calibration/buffer_distribution_entropy": 0.9965140749004997,
|
|
"calibration/confidence_entropy": 0.49118011044271037,
|
|
"calibration/coverage@0%": 0.008213062622309197,
|
|
"calibration/coverage@1%": 0.008213062622309197,
|
|
"calibration/coverage@10%": 0.13416554549902152,
|
|
"calibration/coverage@15%": 0.21314747431506847,
|
|
"calibration/coverage@20%": 0.2542097296966732,
|
|
"calibration/coverage@25%": 0.38098244863013697,
|
|
"calibration/coverage@30%": 0.5280416768590999,
|
|
"calibration/coverage@5%": 0.07275180406066536,
|
|
"calibration/ece": 0.13082628746087527,
|
|
"calibration/mean_confidence": 0.6128360015117618,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 956.8,
|
|
"completions/max_terminated_length": 956.8,
|
|
"completions/mean_length": 223.86318359375,
|
|
"completions/mean_terminated_length": 223.99449462890624,
|
|
"completions/min_length": 20.0,
|
|
"completions/min_terminated_length": 110.0,
|
|
"epoch": 0.864,
|
|
"grad_norm": 0.0009761390392668545,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.001,
|
|
"num_tokens": 915262502.0,
|
|
"reward": 0.9546477913856506,
|
|
"reward_std": 0.0886962041258812,
|
|
"rewards/accgated_coverage_0": 0.023132944479584694,
|
|
"rewards/accgated_coverage_1": 0.023132944479584694,
|
|
"rewards/accgated_coverage_10": 0.023133278265595435,
|
|
"rewards/accgated_coverage_15": 0.02238629199564457,
|
|
"rewards/accgated_coverage_20": 0.017566022649407386,
|
|
"rewards/accgated_coverage_25": 0.016244550049304963,
|
|
"rewards/accgated_coverage_5": 0.023132944479584694,
|
|
"rewards/accuracy_reward": 0.57900390625,
|
|
"rewards/brier_reward": 0.7994905114173889,
|
|
"rewards/confidence_uniqueness_reward": 0.9505669474601746,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.0027686028741300108,
|
|
"rewards/frontier_ece_reward": 0.002419534232467413,
|
|
"rewards/frontier_entropy_batch_reward": -0.24598353505134582,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.05085097923874855,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06552043557167053,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005085097998380661,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005085097998380661,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.05085097923874855,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06552043557167053,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005085097998380661,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005085097998380661,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.050848403573036195,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.0655171237885952,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00508484048768878,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00508484048768878,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04747554138302803,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.061189302057027814,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004747554380446673,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004747554380446673,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.028483838215470313,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.036812521517276764,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0028483838308602572,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0028483838308602572,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.01881438195705414,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.024093781784176826,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0018814382376149297,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0018814382376149297,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.05085097923874855,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06552043557167053,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005085097998380661,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005085097998380661,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.102349853515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.13482767790555955,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0511749267578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0511749267578125,
|
|
"signal/advantage_abs_mean": 0.0686701402068138,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0686701402068138,
|
|
"signal/advantage_pre_scale_std": 0.1049189954996109,
|
|
"signal/advantage_std": 0.1049189954996109,
|
|
"signal/brier_reward/centered_abs_mean": 0.11167400926351548,
|
|
"signal/brier_reward/group_std_mean": 0.14373133778572084,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011167401075363159,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011167401075363159,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013636622577905655,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.019001100584864615,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00136366228107363,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00136366228107363,
|
|
"signal/format_reward/centered_abs_mean": 0.001312255859375,
|
|
"signal/format_reward/group_std_mean": 0.0035306816920638085,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025581192690879106,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004242032580077648,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.19764920277521e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.19764920277521e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004280299786478281,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005746448040008545,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00042802998214028774,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00042802998214028774,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2921872317790985,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3625770092010498,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029218722507357596,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029218722507357596,
|
|
"step": 270
|
|
},
|
|
{
|
|
"calibration/aurc": 0.37473883274386777,
|
|
"calibration/batch_distribution_entropy": 0.9743970080640647,
|
|
"calibration/buffer_distribution_entropy": 0.996299853626935,
|
|
"calibration/confidence_entropy": 0.46121785606815846,
|
|
"calibration/coverage@0%": 0.017578125,
|
|
"calibration/coverage@1%": 0.017578125,
|
|
"calibration/coverage@10%": 0.046900226272015656,
|
|
"calibration/coverage@15%": 0.07934885640900195,
|
|
"calibration/coverage@20%": 0.1508920927103718,
|
|
"calibration/coverage@25%": 0.2279247186888454,
|
|
"calibration/coverage@30%": 0.3037579500978474,
|
|
"calibration/coverage@5%": 0.019140625,
|
|
"calibration/ece": 0.13767695593114995,
|
|
"calibration/mean_confidence": 0.515725347280679,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 716.4,
|
|
"completions/max_terminated_length": 716.4,
|
|
"completions/mean_length": 220.306640625,
|
|
"completions/mean_terminated_length": 220.3911346435547,
|
|
"completions/min_length": 40.4,
|
|
"completions/min_terminated_length": 102.0,
|
|
"epoch": 0.88,
|
|
"grad_norm": 0.0008852293249219656,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0002,
|
|
"num_tokens": 932665514.0,
|
|
"reward": 0.924151074886322,
|
|
"reward_std": 0.08492428660392762,
|
|
"rewards/accgated_coverage_0": 0.029650628566741943,
|
|
"rewards/accgated_coverage_1": 0.029650628566741943,
|
|
"rewards/accgated_coverage_10": 0.029650628566741943,
|
|
"rewards/accgated_coverage_15": 0.02788240723311901,
|
|
"rewards/accgated_coverage_20": 0.01842728815972805,
|
|
"rewards/accgated_coverage_25": 0.014962680265307426,
|
|
"rewards/accgated_coverage_5": 0.029650628566741943,
|
|
"rewards/accuracy_reward": 0.5060546875,
|
|
"rewards/brier_reward": 0.8013368129730225,
|
|
"rewards/confidence_uniqueness_reward": 0.9511494755744934,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0028143803123384715,
|
|
"rewards/frontier_ece_reward": 0.0030478714033961296,
|
|
"rewards/frontier_entropy_batch_reward": -0.22137869000434876,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04147433005273342,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.05344668477773666,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0041474332101643085,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0041474332101643085,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04147433005273342,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.05344668477773666,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0041474332101643085,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0041474332101643085,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04147433005273342,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.05344668477773666,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0041474332101643085,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0041474332101643085,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.03853954002261162,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.04971724823117256,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0038539541885256766,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0038539541885256766,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.02363501489162445,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.030636246129870415,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.002363501605577767,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.002363501605577767,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.015382234752178193,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.019744027778506278,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0015382234705612063,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0015382234705612063,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04147433005273342,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.05344668477773666,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0041474332101643085,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0041474332101643085,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0987060546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1300501987338066,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04935302734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04935302734375,
|
|
"signal/advantage_abs_mean": 0.06596999615430832,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06596999615430832,
|
|
"signal/advantage_pre_scale_std": 0.1031409427523613,
|
|
"signal/advantage_std": 0.1031409427523613,
|
|
"signal/brier_reward/centered_abs_mean": 0.11196524053812026,
|
|
"signal/brier_reward/group_std_mean": 0.14593787491321564,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01119652446359396,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01119652446359396,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013332638517022133,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01807792242616415,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013332638889551162,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013332638889551162,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023726322688162327,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003939795168116688,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.965790372400079e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.965790372400079e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004360213689506054,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0056990132667124275,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004360213817562908,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004360213817562908,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27509679198265075,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3451230525970459,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027509679645299913,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027509679645299913,
|
|
"step": 275
|
|
},
|
|
{
|
|
"calibration/aurc": 0.35513378681848906,
|
|
"calibration/batch_distribution_entropy": 0.9721715794391692,
|
|
"calibration/buffer_distribution_entropy": 0.9958760977657816,
|
|
"calibration/confidence_entropy": 0.47022197464919013,
|
|
"calibration/coverage@0%": 0.00978167808219178,
|
|
"calibration/coverage@1%": 0.00978167808219178,
|
|
"calibration/coverage@10%": 0.04657228473581213,
|
|
"calibration/coverage@15%": 0.1555826504403131,
|
|
"calibration/coverage@20%": 0.2169512903620352,
|
|
"calibration/coverage@25%": 0.3224467954990215,
|
|
"calibration/coverage@30%": 0.47991071428571425,
|
|
"calibration/coverage@5%": 0.010173067514677104,
|
|
"calibration/ece": 0.144891333338042,
|
|
"calibration/mean_confidence": 0.5285554545851768,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 661.8,
|
|
"completions/max_terminated_length": 661.8,
|
|
"completions/mean_length": 217.53076171875,
|
|
"completions/mean_terminated_length": 217.6579162597656,
|
|
"completions/min_length": 20.0,
|
|
"completions/min_terminated_length": 103.2,
|
|
"epoch": 0.896,
|
|
"grad_norm": 0.0009623025543987751,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 950003877.0,
|
|
"reward": 0.9346998453140258,
|
|
"reward_std": 0.0807345226407051,
|
|
"rewards/accgated_coverage_0": 0.02446789890527725,
|
|
"rewards/accgated_coverage_1": 0.02446789890527725,
|
|
"rewards/accgated_coverage_10": 0.02446789890527725,
|
|
"rewards/accgated_coverage_15": 0.023514636792242527,
|
|
"rewards/accgated_coverage_20": 0.016649814136326314,
|
|
"rewards/accgated_coverage_25": 0.01559778805822134,
|
|
"rewards/accgated_coverage_5": 0.02446789890527725,
|
|
"rewards/accuracy_reward": 0.53408203125,
|
|
"rewards/brier_reward": 0.8033548951148987,
|
|
"rewards/confidence_uniqueness_reward": 0.9509935498237609,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.0031520756892859936,
|
|
"rewards/frontier_ece_reward": 0.0023849430959671735,
|
|
"rewards/frontier_entropy_batch_reward": -0.2299666076898575,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04366839006543159,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.056435997039079665,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004366839025169611,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004366839025169611,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04366839006543159,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.056435997039079665,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004366839025169611,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004366839025169611,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04366839006543159,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.056435997039079665,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004366839025169611,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004366839025169611,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.039437131583690645,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.05089409127831459,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00394371310248971,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00394371310248971,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.024134864658117296,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.03109540343284607,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0024134865030646323,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0024134865030646323,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.016570880077779293,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.020994833111763,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0016570880776271223,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0016570880776271223,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04366839006543159,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.056435997039079665,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004366839025169611,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004366839025169611,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.083636474609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11912869811058044,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0418182373046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0418182373046875,
|
|
"signal/advantage_abs_mean": 0.06075609400868416,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06075609400868416,
|
|
"signal/advantage_pre_scale_std": 0.09686070084571838,
|
|
"signal/advantage_std": 0.09686070084571838,
|
|
"signal/brier_reward/centered_abs_mean": 0.10437444597482681,
|
|
"signal/brier_reward/group_std_mean": 0.13576384782791137,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010437444783747196,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010437444783747196,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01360289491713047,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01908070743083954,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013602895196527243,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013602895196527243,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_std_mean": 0.0038669900968670845,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026476346887648106,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004367242194712162,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3095434264396315e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3095434264396315e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004144516214728356,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005457306373864412,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004144516307860613,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004144516307860613,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2829400360584259,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3546809792518616,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02829400487244129,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02829400487244129,
|
|
"step": 280
|
|
},
|
|
{
|
|
"calibration/aurc": 0.38707537310481943,
|
|
"calibration/batch_distribution_entropy": 0.9719699729956378,
|
|
"calibration/buffer_distribution_entropy": 0.995588973701375,
|
|
"calibration/confidence_entropy": 0.4871019202027833,
|
|
"calibration/coverage@0%": 0.022777498904263376,
|
|
"calibration/coverage@1%": 0.022777498904263376,
|
|
"calibration/coverage@10%": 0.0830656638193534,
|
|
"calibration/coverage@15%": 0.17503398795987296,
|
|
"calibration/coverage@20%": 0.27094699097747393,
|
|
"calibration/coverage@25%": 0.3676189943540729,
|
|
"calibration/coverage@30%": 0.42396939632841724,
|
|
"calibration/coverage@5%": 0.027885553914086557,
|
|
"calibration/ece": 0.14931698159648296,
|
|
"calibration/mean_confidence": 0.5074117001231717,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0013671875,
|
|
"completions/max_length": 839.2,
|
|
"completions/max_terminated_length": 839.2,
|
|
"completions/mean_length": 219.380078125,
|
|
"completions/mean_terminated_length": 219.6826599121094,
|
|
"completions/min_length": 37.4,
|
|
"completions/min_terminated_length": 102.6,
|
|
"epoch": 0.912,
|
|
"grad_norm": 0.0010746036423370242,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.001,
|
|
"num_tokens": 967301625.0,
|
|
"reward": 0.9356503963470459,
|
|
"reward_std": 0.08552553951740265,
|
|
"rewards/accgated_coverage_0": 0.023921893909573555,
|
|
"rewards/accgated_coverage_1": 0.023921893909573555,
|
|
"rewards/accgated_coverage_10": 0.023920951783657073,
|
|
"rewards/accgated_coverage_15": 0.02283487160457298,
|
|
"rewards/accgated_coverage_20": 0.016963693872094156,
|
|
"rewards/accgated_coverage_25": 0.01679763663560152,
|
|
"rewards/accgated_coverage_5": 0.023921893909573555,
|
|
"rewards/accuracy_reward": 0.5359375,
|
|
"rewards/brier_reward": 0.8047610640525817,
|
|
"rewards/confidence_uniqueness_reward": 0.9503351330757142,
|
|
"rewards/format_reward": 0.9986328125,
|
|
"rewards/frontier_aurc_reward": -0.0026335842441767452,
|
|
"rewards/frontier_ece_reward": 0.00246874107979238,
|
|
"rewards/frontier_entropy_batch_reward": -0.22586590945720672,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04468504786491394,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.05757425650954247,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004468504665419459,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004468504665419459,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04468504786491394,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.05757425650954247,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004468504665419459,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004468504665419459,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.044683948159217834,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.057572783529758455,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004468394769355654,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004468394769355654,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.0402244932949543,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.0519582524895668,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004022449580952525,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004022449580952525,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.02362305298447609,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.03058423213660717,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0023623052751645447,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0023623052751645447,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.016879346594214438,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.021654066629707813,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.001687934761866927,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.001687934761866927,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04468504786491394,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.05757425650954247,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004468504665419459,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004468504665419459,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08746337890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.12233798801898957,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.043731689453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.043731689453125,
|
|
"signal/advantage_abs_mean": 0.06453572064638138,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06453572064638138,
|
|
"signal/advantage_pre_scale_std": 0.10137254744768143,
|
|
"signal/advantage_std": 0.10137254744768143,
|
|
"signal/brier_reward/centered_abs_mean": 0.11355163305997848,
|
|
"signal/brier_reward/group_std_mean": 0.14732645452022552,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011355163529515266,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011355163529515266,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014806121587753296,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02111569344997406,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014806122286245226,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014806122286245226,
|
|
"signal/format_reward/centered_abs_mean": 0.0025634765625,
|
|
"signal/format_reward/group_std_mean": 0.005934012494981289,
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00128173828125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00128173828125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002372403466142714,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004163792729377747,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9655042089871132e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9655042089871132e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004044037964195013,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005374080128967762,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000404403789434582,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000404403789434582,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28103450536727903,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35467966794967654,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028103450685739516,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028103450685739516,
|
|
"step": 285
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3867435653616995,
|
|
"calibration/batch_distribution_entropy": 0.9813316739371245,
|
|
"calibration/buffer_distribution_entropy": 0.9959011772524413,
|
|
"calibration/confidence_entropy": 0.49522539173686414,
|
|
"calibration/coverage@0%": 0.007423403864970646,
|
|
"calibration/coverage@1%": 0.007423403864970646,
|
|
"calibration/coverage@10%": 0.019532778864970644,
|
|
"calibration/coverage@15%": 0.03164826932485323,
|
|
"calibration/coverage@20%": 0.1396342954990215,
|
|
"calibration/coverage@25%": 0.23307393590998043,
|
|
"calibration/coverage@30%": 0.30033252813111544,
|
|
"calibration/coverage@5%": 0.007423403864970646,
|
|
"calibration/ece": 0.1281636041486433,
|
|
"calibration/mean_confidence": 0.4964639264819253,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 716.8,
|
|
"completions/max_terminated_length": 716.8,
|
|
"completions/mean_length": 214.851953125,
|
|
"completions/mean_terminated_length": 214.97773132324218,
|
|
"completions/min_length": 22.0,
|
|
"completions/min_terminated_length": 97.8,
|
|
"epoch": 0.928,
|
|
"grad_norm": 0.0007004987564869225,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0007,
|
|
"num_tokens": 984528525.0,
|
|
"reward": 0.9267191290855408,
|
|
"reward_std": 0.08182481080293655,
|
|
"rewards/accgated_coverage_0": 0.027588574960827828,
|
|
"rewards/accgated_coverage_1": 0.027588574960827828,
|
|
"rewards/accgated_coverage_10": 0.027588574960827828,
|
|
"rewards/accgated_coverage_15": 0.025525929778814314,
|
|
"rewards/accgated_coverage_20": 0.017605995759367943,
|
|
"rewards/accgated_coverage_25": 0.015273153223097325,
|
|
"rewards/accgated_coverage_5": 0.027588574960827828,
|
|
"rewards/accuracy_reward": 0.51845703125,
|
|
"rewards/brier_reward": 0.7924355387687683,
|
|
"rewards/confidence_uniqueness_reward": 0.9507668256759644,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.0030933755449950697,
|
|
"rewards/frontier_ece_reward": 0.002328445459716022,
|
|
"rewards/frontier_entropy_batch_reward": -0.23606752157211303,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04182121828198433,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.05356579944491387,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004182121716439724,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004182121716439724,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04182121828198433,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.05356579944491387,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004182121716439724,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004182121716439724,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04182121828198433,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.05356579944491387,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004182121716439724,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004182121716439724,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.03778692409396171,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.048517832159996034,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0037786925211548807,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0037786925211548807,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.022749120369553566,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.029339329153299332,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0022749120369553568,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0022749120369553568,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.016079240664839744,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.02050723284482956,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0016079240944236518,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0016079240944236518,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04182121828198433,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.05356579944491387,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004182121716439724,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004182121716439724,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.086602783203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11836623698472977,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0433013916015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0433013916015625,
|
|
"signal/advantage_abs_mean": 0.06185290068387985,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06185290068387985,
|
|
"signal/advantage_pre_scale_std": 0.09768745750188827,
|
|
"signal/advantage_std": 0.09768745750188827,
|
|
"signal/brier_reward/centered_abs_mean": 0.11498722583055496,
|
|
"signal/brier_reward/group_std_mean": 0.14888761341571807,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011498722806572914,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011498722806572914,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014011159539222717,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01889067105948925,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014011159539222718,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014011159539222718,
|
|
"signal/format_reward/centered_abs_mean": 0.001123046875,
|
|
"signal/format_reward/group_std_mean": 0.0029782545287162067,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005615234375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0005615234375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027191273402422667,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004544518515467643,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3989092116826214e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3989092116826214e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0042263313196599485,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005566684249788523,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004226331366226077,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004226331366226077,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28662583231925964,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3584433555603027,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028662583604454995,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028662583604454995,
|
|
"step": 290
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26530272496950286,
|
|
"calibration/batch_distribution_entropy": 0.9849073126919665,
|
|
"calibration/buffer_distribution_entropy": 0.9962016302239295,
|
|
"calibration/confidence_entropy": 0.5012886823644893,
|
|
"calibration/coverage@0%": 0.022666196615632555,
|
|
"calibration/coverage@1%": 0.022666196615632555,
|
|
"calibration/coverage@10%": 0.1883402162234757,
|
|
"calibration/coverage@15%": 0.2782153469264418,
|
|
"calibration/coverage@20%": 0.3884802002992978,
|
|
"calibration/coverage@25%": 0.4768226708491616,
|
|
"calibration/coverage@30%": 0.5859547402008749,
|
|
"calibration/coverage@5%": 0.07540057161563256,
|
|
"calibration/ece": 0.08265569427508188,
|
|
"calibration/mean_confidence": 0.48410358096480427,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 660.8,
|
|
"completions/max_terminated_length": 660.8,
|
|
"completions/mean_length": 213.1173828125,
|
|
"completions/mean_terminated_length": 213.2224548339844,
|
|
"completions/min_length": 64.0,
|
|
"completions/min_terminated_length": 106.2,
|
|
"epoch": 0.944,
|
|
"grad_norm": 0.0007865950465202332,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 1001686271.0,
|
|
"reward": 0.9313256740570068,
|
|
"reward_std": 0.08973944038152695,
|
|
"rewards/accgated_coverage_0": 0.025818699225783347,
|
|
"rewards/accgated_coverage_1": 0.025818699225783347,
|
|
"rewards/accgated_coverage_10": 0.025818699225783347,
|
|
"rewards/accgated_coverage_15": 0.024165811762213708,
|
|
"rewards/accgated_coverage_20": 0.01818331703543663,
|
|
"rewards/accgated_coverage_25": 0.014625198766589164,
|
|
"rewards/accgated_coverage_5": 0.025818699225783347,
|
|
"rewards/accuracy_reward": 0.5248046875,
|
|
"rewards/brier_reward": 0.798682701587677,
|
|
"rewards/confidence_uniqueness_reward": 0.9513814806938171,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.0027979562990367414,
|
|
"rewards/frontier_ece_reward": 0.0022492259275168182,
|
|
"rewards/frontier_entropy_batch_reward": -0.22004973590373994,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.044853395968675616,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.05786952823400497,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00448533957824111,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00448533957824111,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.044853395968675616,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.05786952823400497,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00448533957824111,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00448533957824111,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.044853395968675616,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.05786952823400497,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00448533957824111,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00448533957824111,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04129325300455093,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.05330209955573082,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00412932513281703,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00412932513281703,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.024261708557605743,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.031504085287451744,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0024261708138510587,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0024261708138510587,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.016015125811100005,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.020626705139875412,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0016015126369893552,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0016015126369893552,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.044853395968675616,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.05786952823400497,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00448533957824111,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00448533957824111,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1136474609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.148611381649971,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05682373046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05682373046875,
|
|
"signal/advantage_abs_mean": 0.06972624510526657,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06972624510526657,
|
|
"signal/advantage_pre_scale_std": 0.10730479061603546,
|
|
"signal/advantage_std": 0.10730479061603546,
|
|
"signal/brier_reward/centered_abs_mean": 0.11074768900871276,
|
|
"signal/brier_reward/group_std_mean": 0.14232046902179718,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011074769496917724,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011074769496917724,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013354774564504623,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018452975898981094,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013354774564504623,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013354774564504623,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_std_mean": 0.0033145629800856113,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021938590798527002,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003584741707891226,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7423238498158754e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7423238498158754e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004127887263894081,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005519901774823666,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00041278875432908534,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00041278875432908534,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.279949414730072,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3534364700317383,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02799494154751301,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02799494154751301,
|
|
"step": 295
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34402592275438887,
|
|
"calibration/batch_distribution_entropy": 0.9760305696696996,
|
|
"calibration/buffer_distribution_entropy": 0.9963368302840016,
|
|
"calibration/confidence_entropy": 0.4657364091598736,
|
|
"calibration/coverage@0%": 0.007421875,
|
|
"calibration/coverage@1%": 0.007421875,
|
|
"calibration/coverage@10%": 0.065625,
|
|
"calibration/coverage@15%": 0.211328125,
|
|
"calibration/coverage@20%": 0.265234375,
|
|
"calibration/coverage@25%": 0.3265625,
|
|
"calibration/coverage@30%": 0.37265625,
|
|
"calibration/coverage@5%": 0.011328125,
|
|
"calibration/ece": 0.14689679746783818,
|
|
"calibration/mean_confidence": 0.5482983593374213,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 907.2,
|
|
"completions/max_terminated_length": 907.2,
|
|
"completions/mean_length": 208.171484375,
|
|
"completions/mean_terminated_length": 208.1913269042969,
|
|
"completions/min_length": 82.8,
|
|
"completions/min_terminated_length": 103.8,
|
|
"epoch": 0.96,
|
|
"grad_norm": 0.0008087375317700207,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 1018758267.0,
|
|
"reward": 0.9315295815467834,
|
|
"reward_std": 0.07846418023109436,
|
|
"rewards/accgated_coverage_0": 0.02990303039550781,
|
|
"rewards/accgated_coverage_1": 0.02990303039550781,
|
|
"rewards/accgated_coverage_10": 0.029898150265216826,
|
|
"rewards/accgated_coverage_15": 0.028139904513955115,
|
|
"rewards/accgated_coverage_20": 0.019901422411203386,
|
|
"rewards/accgated_coverage_25": 0.01664300709962845,
|
|
"rewards/accgated_coverage_5": 0.02990303039550781,
|
|
"rewards/accuracy_reward": 0.51904296875,
|
|
"rewards/brier_reward": 0.8051493167877197,
|
|
"rewards/confidence_uniqueness_reward": 0.9509793639183044,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.003521607397124171,
|
|
"rewards/frontier_ece_reward": 0.002893084893003106,
|
|
"rewards/frontier_entropy_batch_reward": -0.22230381965637208,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.03826850652694702,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.05015629380941391,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0038268506061285732,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0038268506061285732,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.03826850652694702,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.05015629380941391,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0038268506061285732,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0038268506061285732,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.03825867623090744,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.05014391764998436,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0038258675020188095,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0038258675020188095,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.03575834967195988,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.0468507744371891,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0035758350044488908,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0035758350044488908,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.021050278469920157,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.02743927575647831,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0021050279028713703,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0021050279028713703,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.015643270313739778,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.01996114067733288,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0015643270453438163,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0015643270453438163,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.03826850652694702,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.05015629380941391,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0038268506061285732,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0038268506061285732,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.085845947265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11925376802682877,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0429229736328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0429229736328125,
|
|
"signal/advantage_abs_mean": 0.06013981848955154,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06013981848955154,
|
|
"signal/advantage_pre_scale_std": 0.09505997449159623,
|
|
"signal/advantage_std": 0.09505997449159623,
|
|
"signal/brier_reward/centered_abs_mean": 0.10594068318605424,
|
|
"signal/brier_reward/group_std_mean": 0.1382671058177948,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01059406865388155,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01059406865388155,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012687204778194428,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016237646527588367,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012687204871326685,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012687204871326685,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029484509490430356,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004800262581557036,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.685563715407625e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.685563715407625e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004553208034485579,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005956902913749218,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00045532081858254967,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00045532081858254967,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26913765668869016,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3415271699428558,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02691376656293869,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02691376656293869,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"eval_calibration/aurc": 0.44108417363651087,
|
|
"eval_calibration/batch_distribution_entropy": 0.947659286598127,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9959698349464184,
|
|
"eval_calibration/confidence_entropy": 0.4709155969582669,
|
|
"eval_calibration/coverage@0%": 0.0625,
|
|
"eval_calibration/coverage@1%": 0.0625,
|
|
"eval_calibration/coverage@10%": 0.125,
|
|
"eval_calibration/coverage@15%": 0.1328125,
|
|
"eval_calibration/coverage@20%": 0.2109375,
|
|
"eval_calibration/coverage@25%": 0.2265625,
|
|
"eval_calibration/coverage@30%": 0.28125,
|
|
"eval_calibration/coverage@5%": 0.0625,
|
|
"eval_calibration/ece": 0.2087351441017339,
|
|
"eval_calibration/mean_confidence": 0.5224578841219565,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 461.0,
|
|
"eval_completions/max_terminated_length": 461.0,
|
|
"eval_completions/mean_length": 207.15261459350586,
|
|
"eval_completions/mean_terminated_length": 207.15261459350586,
|
|
"eval_completions/min_length": 110.5,
|
|
"eval_completions/min_terminated_length": 110.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 1018758267.0,
|
|
"eval_reward": 0.8045169711112976,
|
|
"eval_reward_std": 0.22579142451286316,
|
|
"eval_rewards/accgated_coverage_0": 0.033579444978386164,
|
|
"eval_rewards/accgated_coverage_1": 0.033579444978386164,
|
|
"eval_rewards/accgated_coverage_10": 0.03355382476001978,
|
|
"eval_rewards/accgated_coverage_15": 0.03188042528927326,
|
|
"eval_rewards/accgated_coverage_20": 0.01990594994276762,
|
|
"eval_rewards/accgated_coverage_25": 0.011771299876272678,
|
|
"eval_rewards/accgated_coverage_5": 0.033579444978386164,
|
|
"eval_rewards/accuracy_reward": 0.4296875,
|
|
"eval_rewards/brier_reward": 0.7986667156219482,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.89794921875,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.004401608370244503,
|
|
"eval_rewards/frontier_ece_reward": 0.0028167355339974165,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 22.9836,
|
|
"eval_samples_per_second": 21.755,
|
|
"eval_signal/accgated_coverage_0/centered_abs_mean": 0.06548797804862261,
|
|
"eval_signal/accgated_coverage_0/group_std_mean": 0.07980928383767605,
|
|
"eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.006548797828145325,
|
|
"eval_signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.006548797828145325,
|
|
"eval_signal/accgated_coverage_1/centered_abs_mean": 0.06548797804862261,
|
|
"eval_signal/accgated_coverage_1/group_std_mean": 0.07980928383767605,
|
|
"eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.006548797828145325,
|
|
"eval_signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.006548797828145325,
|
|
"eval_signal/accgated_coverage_10/centered_abs_mean": 0.06543731037527323,
|
|
"eval_signal/accgated_coverage_10/group_std_mean": 0.07975173369050026,
|
|
"eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0065437310840934515,
|
|
"eval_signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0065437310840934515,
|
|
"eval_signal/accgated_coverage_15/centered_abs_mean": 0.062019459903240204,
|
|
"eval_signal/accgated_coverage_15/group_std_mean": 0.07585378549993038,
|
|
"eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006201946176588535,
|
|
"eval_signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006201946176588535,
|
|
"eval_signal/accgated_coverage_20/centered_abs_mean": 0.03423011302947998,
|
|
"eval_signal/accgated_coverage_20/group_std_mean": 0.042966075241565704,
|
|
"eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0034230112796649337,
|
|
"eval_signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0034230112796649337,
|
|
"eval_signal/accgated_coverage_25/centered_abs_mean": 0.01742625329643488,
|
|
"eval_signal/accgated_coverage_25/group_std_mean": 0.022265508770942688,
|
|
"eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0017426253180019557,
|
|
"eval_signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0017426253180019557,
|
|
"eval_signal/accgated_coverage_5/centered_abs_mean": 0.06548797804862261,
|
|
"eval_signal/accgated_coverage_5/group_std_mean": 0.07980928383767605,
|
|
"eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.006548797828145325,
|
|
"eval_signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.006548797828145325,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.473388671875,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49401039630174637,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2366943359375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2366943359375,
|
|
"eval_signal/advantage_abs_mean": 0.20914247632026672,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20914247632026672,
|
|
"eval_signal/advantage_pre_scale_std": 0.22344782203435898,
|
|
"eval_signal/advantage_std": 0.22344782203435898,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.19477688893675804,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2502768486738205,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019477689173072577,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019477689173072577,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0395965576171875,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04632946569472551,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003959655878134072,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003959655878134072,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0058913074899464846,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.011559756007045507,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.364134944509715e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.364134944509715e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0058010019129142165,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.008047543233260512,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005801001680083573,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005801001680083573,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.174,
|
|
"step": 300
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2518197302031364,
|
|
"calibration/batch_distribution_entropy": 0.9684625187956735,
|
|
"calibration/buffer_distribution_entropy": 0.9958406031055667,
|
|
"calibration/confidence_entropy": 0.49119229362433725,
|
|
"calibration/coverage@0%": 0.048463541666666665,
|
|
"calibration/coverage@1%": 0.048463541666666665,
|
|
"calibration/coverage@10%": 0.28565410539215685,
|
|
"calibration/coverage@15%": 0.4068658088235294,
|
|
"calibration/coverage@20%": 0.4940502450980392,
|
|
"calibration/coverage@25%": 0.5573927696078431,
|
|
"calibration/coverage@30%": 0.6101761642156862,
|
|
"calibration/coverage@5%": 0.11487438725490196,
|
|
"calibration/ece": 0.11883158877655049,
|
|
"calibration/mean_confidence": 0.5323535677827451,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 869.6,
|
|
"completions/max_terminated_length": 869.6,
|
|
"completions/mean_length": 205.5177734375,
|
|
"completions/mean_terminated_length": 205.68072204589845,
|
|
"completions/min_length": 18.6,
|
|
"completions/min_terminated_length": 97.8,
|
|
"epoch": 0.976,
|
|
"grad_norm": 0.0008637637365609407,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0003,
|
|
"num_tokens": 1035723889.0,
|
|
"reward": 0.9390368580818176,
|
|
"reward_std": 0.08419644683599473,
|
|
"rewards/accgated_coverage_0": 0.025884364638477562,
|
|
"rewards/accgated_coverage_1": 0.025884364638477562,
|
|
"rewards/accgated_coverage_10": 0.025869949627667664,
|
|
"rewards/accgated_coverage_15": 0.024993031146004797,
|
|
"rewards/accgated_coverage_20": 0.018615927174687387,
|
|
"rewards/accgated_coverage_25": 0.015474013239145278,
|
|
"rewards/accgated_coverage_5": 0.025884364638477562,
|
|
"rewards/accuracy_reward": 0.54248046875,
|
|
"rewards/brier_reward": 0.806470787525177,
|
|
"rewards/confidence_uniqueness_reward": 0.950226652622223,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.0030988804530352352,
|
|
"rewards/frontier_ece_reward": 0.002570468030171469,
|
|
"rewards/frontier_entropy_batch_reward": -0.23961410224437713,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04443902298808098,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.056985524296760556,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004443902382627129,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004443902382627129,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04443902298808098,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.056985524296760556,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004443902382627129,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004443902382627129,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.044393166154623034,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.05692854225635528,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004439316829666496,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004439316829666496,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.041806505620479585,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.05369865670800209,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004180650692433119,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004180650692433119,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.023348334059119225,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.030321285501122473,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0023348334711045028,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0023348334711045028,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.01600625291466713,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.020638634264469147,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0016006252961233258,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0016006252961233258,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04443902298808098,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.056985524296760556,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004443902382627129,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004443902382627129,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.095660400390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.12655805945396423,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.634375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0478302001953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0478302001953125,
|
|
"signal/advantage_abs_mean": 0.06486985236406326,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06486985236406326,
|
|
"signal/advantage_pre_scale_std": 0.10080467015504838,
|
|
"signal/advantage_std": 0.10080467015504838,
|
|
"signal/brier_reward/centered_abs_mean": 0.10289306491613388,
|
|
"signal/brier_reward/group_std_mean": 0.1338332325220108,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010289306752383709,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010289306752383709,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014014300890266895,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.019804118014872075,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014014300424605608,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014014300424605608,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_std_mean": 0.004419417306780815,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026059220312163235,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004272950720041991,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.2574026408838105e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.2574026408838105e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00430595139041543,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005667751654982567,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00043059513554908333,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00043059513554908333,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29276729822158815,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3635729193687439,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02927673012018204,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02927673012018204,
|
|
"step": 305
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3662020936085089,
|
|
"calibration/batch_distribution_entropy": 0.9741297166815833,
|
|
"calibration/buffer_distribution_entropy": 0.9956483661118323,
|
|
"calibration/confidence_entropy": 0.47563083944507,
|
|
"calibration/coverage@0%": 0.003520220588235294,
|
|
"calibration/coverage@1%": 0.003520220588235294,
|
|
"calibration/coverage@10%": 0.05664522058823529,
|
|
"calibration/coverage@15%": 0.09883272058823529,
|
|
"calibration/coverage@20%": 0.15978400735294118,
|
|
"calibration/coverage@25%": 0.28942555147058824,
|
|
"calibration/coverage@30%": 0.3864047181372549,
|
|
"calibration/coverage@5%": 0.003520220588235294,
|
|
"calibration/ece": 0.13104297770085743,
|
|
"calibration/mean_confidence": 0.4632078044857339,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 685.2,
|
|
"completions/max_terminated_length": 685.2,
|
|
"completions/mean_length": 199.6486328125,
|
|
"completions/mean_terminated_length": 199.74756469726563,
|
|
"completions/min_length": 39.0,
|
|
"completions/min_terminated_length": 97.6,
|
|
"epoch": 0.992,
|
|
"grad_norm": 0.0008076268131844699,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.001,
|
|
"num_tokens": 1052896771.0,
|
|
"reward": 0.9218993306159973,
|
|
"reward_std": 0.08259947896003723,
|
|
"rewards/accgated_coverage_0": 0.02501909025013447,
|
|
"rewards/accgated_coverage_1": 0.02501909025013447,
|
|
"rewards/accgated_coverage_10": 0.024991927668452263,
|
|
"rewards/accgated_coverage_15": 0.02396598644554615,
|
|
"rewards/accgated_coverage_20": 0.017341459915041924,
|
|
"rewards/accgated_coverage_25": 0.015913099609315397,
|
|
"rewards/accgated_coverage_5": 0.02501909025013447,
|
|
"rewards/accuracy_reward": 0.511328125,
|
|
"rewards/brier_reward": 0.8048971056938171,
|
|
"rewards/confidence_uniqueness_reward": 0.9490490436553956,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.002771574491634965,
|
|
"rewards/frontier_ece_reward": 0.002409780048765242,
|
|
"rewards/frontier_entropy_batch_reward": -0.24799684882164003,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04057878255844116,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.05137483105063438,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004057878255844116,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004057878255844116,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04057878255844116,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.05137483105063438,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004057878255844116,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004057878255844116,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.040538350492715834,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.05132369995117188,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004053835105150938,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004053835105150938,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.03887024968862533,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.04922617822885513,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0038870248012244702,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0038870248012244702,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.021970576792955398,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.02805563621222973,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0021970578003674746,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0021970578003674746,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.015556910447776318,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.0197615884244442,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0015556911006569863,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0015556911006569863,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04057878255844116,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.05137483105063438,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004057878255844116,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004057878255844116,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08858642578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11626765429973603,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044293212890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044293212890625,
|
|
"signal/advantage_abs_mean": 0.06369578093290329,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06369578093290329,
|
|
"signal/advantage_pre_scale_std": 0.1002244383096695,
|
|
"signal/advantage_std": 0.1002244383096695,
|
|
"signal/brier_reward/centered_abs_mean": 0.10709832906723023,
|
|
"signal/brier_reward/group_std_mean": 0.1369374841451645,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010709832608699798,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010709832608699798,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014920119382441043,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02039230614900589,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014920119661837815,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014920119661837815,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_std_mean": 0.0033145629800856113,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00225202739238739,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0036956620868295433,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8150342404842377e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8150342404842377e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0042619360610842705,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0056199970655143264,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00042619362357072533,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00042619362357072533,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2883952736854553,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3603264093399048,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028839527815580367,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028839527815580367,
|
|
"step": 310
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2565749108783924,
|
|
"calibration/batch_distribution_entropy": 0.9558861737758754,
|
|
"calibration/buffer_distribution_entropy": 0.9958182821581034,
|
|
"calibration/confidence_entropy": 0.46042922907413786,
|
|
"calibration/coverage@0%": 0.017578125,
|
|
"calibration/coverage@1%": 0.017578125,
|
|
"calibration/coverage@10%": 0.111328125,
|
|
"calibration/coverage@15%": 0.1962890625,
|
|
"calibration/coverage@20%": 0.3427734375,
|
|
"calibration/coverage@25%": 0.5517578125,
|
|
"calibration/coverage@30%": 0.640625,
|
|
"calibration/coverage@5%": 0.0517578125,
|
|
"calibration/ece": 0.14136155363744674,
|
|
"calibration/mean_confidence": 0.606045254981874,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 502.5,
|
|
"completions/max_terminated_length": 502.5,
|
|
"completions/mean_length": 202.1562957763672,
|
|
"completions/mean_terminated_length": 202.1562957763672,
|
|
"completions/min_length": 99.0,
|
|
"completions/min_terminated_length": 99.0,
|
|
"epoch": 0.9984,
|
|
"num_tokens": 1059728009.0,
|
|
"reward": 0.9332478642463684,
|
|
"reward_std": 0.08353905007243156,
|
|
"rewards/accgated_coverage_0": 0.015768482349812984,
|
|
"rewards/accgated_coverage_1": 0.015768482349812984,
|
|
"rewards/accgated_coverage_10": 0.015732761472463608,
|
|
"rewards/accgated_coverage_15": 0.014848333783447742,
|
|
"rewards/accgated_coverage_20": 0.012655510101467371,
|
|
"rewards/accgated_coverage_25": 0.013384385034441948,
|
|
"rewards/accgated_coverage_5": 0.015768482349812984,
|
|
"rewards/accuracy_reward": 0.536376953125,
|
|
"rewards/brier_reward": 0.7888486683368683,
|
|
"rewards/confidence_uniqueness_reward": 0.9530830383300781,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0028738367836922407,
|
|
"rewards/frontier_ece_reward": 0.002294275094754994,
|
|
"rewards/frontier_entropy_batch_reward": -0.19719929993152618,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.043060190975666046,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.05668780021369457,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004306019051000476,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004306019051000476,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.043060190975666046,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.05668780021369457,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004306019051000476,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004306019051000476,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04302673973143101,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.056644506752491,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004302673973143101,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004302673973143101,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04146258533000946,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.05463242903351784,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004146258695982397,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004146258695982397,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.02249743789434433,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.0300173107534647,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.002249743905849755,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.002249743905849755,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.015130959451198578,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.01980656199157238,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0015130960382521152,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0015130960382521152,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.043060190975666046,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.05668780021369457,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004306019051000476,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004306019051000476,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0918731689453125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12753162533044815,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6171875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04593658447265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04593658447265625,
|
|
"signal/advantage_abs_mean": 0.06494300253689289,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06494300253689289,
|
|
"signal/advantage_pre_scale_std": 0.10036676377058029,
|
|
"signal/advantage_std": 0.10036676377058029,
|
|
"signal/brier_reward/centered_abs_mean": 0.10487185046076775,
|
|
"signal/brier_reward/group_std_mean": 0.13467831909656525,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010487185325473547,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010487185325473547,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011093974113464355,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.013951313681900501,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001109397446271032,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001109397446271032,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002461986499838531,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004381507635116577,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.077483233937528e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.077483233937528e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00429272442124784,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005649249535053968,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004292724479455501,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004292724479455501,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2588946372270584,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32734930515289307,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02588946372270584,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02588946372270584,
|
|
"step": 312,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.000582187315441656,
|
|
"train_runtime": 61430.7382,
|
|
"train_samples_per_second": 0.326,
|
|
"train_steps_per_second": 0.005
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 312,
|
|
"num_input_tokens_seen": 1059728009,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|