Model: hector-gr/RLCR-v4-ks-uniqueness-hotpot-aliases-acceptedanswersfix Source: Original Platform
8474 lines
471 KiB
JSON
8474 lines
471 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9984,
|
|
"eval_steps": 50,
|
|
"global_step": 312,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.5681354920214096,
|
|
"calibration/batch_distribution_entropy": 0.6450350928927815,
|
|
"calibration/confidence_entropy": 0.346740957452881,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.43446125614086684,
|
|
"calibration/mean_confidence": 0.7908406375397601,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0361328125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1503.4,
|
|
"completions/mean_length": 271.28115234375,
|
|
"completions/mean_terminated_length": 223.8628723144531,
|
|
"completions/min_length": 2.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.059467002749443054,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"loss": 0.0934,
|
|
"num_tokens": 17621951.0,
|
|
"reward": 0.6730658292770386,
|
|
"reward_std": 0.5045446038246155,
|
|
"rewards/accuracy_reward": 0.27001953125,
|
|
"rewards/brier_reward": 0.4092401027679443,
|
|
"rewards/confidence_uniqueness_reward": 0.48412379026412966,
|
|
"rewards/format_reward": 0.68173828125,
|
|
"rewards/frontier_aurc_reward": 0.3416558563709259,
|
|
"rewards/frontier_coverage_1": 0.3416558563709259,
|
|
"rewards/frontier_coverage_10": 0.3416558563709259,
|
|
"rewards/frontier_coverage_15": 0.3416558563709259,
|
|
"rewards/frontier_coverage_20": 0.3416558563709259,
|
|
"rewards/frontier_coverage_25": 0.3416558563709259,
|
|
"rewards/frontier_coverage_5": 0.3416558563709259,
|
|
"rewards/frontier_ece_reward": 0.3416558563709259,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.279132080078125,
|
|
"signal/accuracy_reward/group_std_mean": 0.31931535005569456,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.25625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1395660400390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1395660400390625,
|
|
"signal/advantage_abs_mean": 0.43447349071502683,
|
|
"signal/advantage_pre_scale_abs_mean": 0.43447349071502683,
|
|
"signal/advantage_pre_scale_std": 0.5123933017253876,
|
|
"signal/advantage_std": 0.5123933017253876,
|
|
"signal/brier_reward/centered_abs_mean": 0.33928354978561404,
|
|
"signal/brier_reward/group_std_mean": 0.38253386616706847,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.042410443723201754,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.042410443723201754,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2978093445301056,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.34845100045204164,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0372261680662632,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0372261680662632,
|
|
"signal/format_reward/centered_abs_mean": 0.404998779296875,
|
|
"signal/format_reward/group_std_mean": 0.4546263098716736,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2024993896484375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.2024993896484375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.31834944486618044,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.36653432846069334,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00569845512509346,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00569845512509346,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.31834944486618044,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.36653432846069334,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00569845512509346,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00569845512509346,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.31834944486618044,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.36653432846069334,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00569845512509346,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00569845512509346,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.31834944486618044,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.36653432846069334,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00569845512509346,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00569845512509346,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.31834944486618044,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.36653432846069334,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00569845512509346,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00569845512509346,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.31834944486618044,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.36653432846069334,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00569845512509346,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00569845512509346,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.31834944486618044,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.36653432846069334,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00569845512509346,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00569845512509346,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.31834944486618044,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.36653432846069334,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.039793680608272555,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.039793680608272555,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5823592206951076,
|
|
"calibration/batch_distribution_entropy": 0.6377635262826689,
|
|
"calibration/confidence_entropy": 0.34316212043587685,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4711939179056281,
|
|
"calibration/mean_confidence": 0.8033346823525754,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03837890625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1489.4,
|
|
"completions/mean_length": 264.73369140625,
|
|
"completions/mean_terminated_length": 214.02195739746094,
|
|
"completions/min_length": 2.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.03480805084109306,
|
|
"learning_rate": 6.249999999999999e-07,
|
|
"loss": 0.0952,
|
|
"num_tokens": 35433176.0,
|
|
"reward": 0.6785492658615112,
|
|
"reward_std": 0.4799711525440216,
|
|
"rewards/accuracy_reward": 0.25234375,
|
|
"rewards/brier_reward": 0.4085344135761261,
|
|
"rewards/confidence_uniqueness_reward": 0.506452476978302,
|
|
"rewards/format_reward": 0.70908203125,
|
|
"rewards/frontier_aurc_reward": 0.3334519624710083,
|
|
"rewards/frontier_coverage_1": 0.3334519624710083,
|
|
"rewards/frontier_coverage_10": 0.3334519624710083,
|
|
"rewards/frontier_coverage_15": 0.3334519624710083,
|
|
"rewards/frontier_coverage_20": 0.3334519624710083,
|
|
"rewards/frontier_coverage_25": 0.3334519624710083,
|
|
"rewards/frontier_coverage_5": 0.3334519624710083,
|
|
"rewards/frontier_ece_reward": 0.3334519624710083,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2619384765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.30939258337020875,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.246875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13096923828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.13096923828125,
|
|
"signal/advantage_abs_mean": 0.4020266532897949,
|
|
"signal/advantage_pre_scale_abs_mean": 0.4020266532897949,
|
|
"signal/advantage_pre_scale_std": 0.48843042850494384,
|
|
"signal/advantage_std": 0.48843042850494384,
|
|
"signal/brier_reward/centered_abs_mean": 0.3226713418960571,
|
|
"signal/brier_reward/group_std_mean": 0.3693849265575409,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04033391773700714,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.04033391773700714,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2817148804664612,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.3387665629386902,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03521436005830765,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03521436005830765,
|
|
"signal/format_reward/centered_abs_mean": 0.378582763671875,
|
|
"signal/format_reward/group_std_mean": 0.43834707140922546,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1892913818359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1892913818359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.30297967195510866,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3533449411392212,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005423336289823055,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005423336289823055,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.30297967195510866,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3533449411392212,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005423336289823055,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005423336289823055,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.30297967195510866,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3533449411392212,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005423336289823055,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005423336289823055,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.30297967195510866,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3533449411392212,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005423336289823055,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005423336289823055,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.30297967195510866,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3533449411392212,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005423336289823055,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005423336289823055,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.30297967195510866,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3533449411392212,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005423336289823055,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005423336289823055,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.30297967195510866,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3533449411392212,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005423336289823055,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005423336289823055,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.30297967195510866,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3533449411392212,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03787245899438858,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03787245899438858,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.485502347974659,
|
|
"calibration/batch_distribution_entropy": 0.6338946369559008,
|
|
"calibration/buffer_distribution_entropy": 0.6565034331851883,
|
|
"calibration/confidence_entropy": 0.3398889343388115,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.38130162024695624,
|
|
"calibration/mean_confidence": 0.8044045301334914,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02001953125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1488.4,
|
|
"completions/mean_length": 213.66611328125,
|
|
"completions/mean_terminated_length": 186.77418518066406,
|
|
"completions/min_length": 5.2,
|
|
"completions/min_terminated_length": 5.2,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.09747687727212906,
|
|
"learning_rate": 9.374999999999999e-07,
|
|
"loss": 0.0577,
|
|
"num_tokens": 52669853.0,
|
|
"reward": 0.8086728811264038,
|
|
"reward_std": 0.3855405569076538,
|
|
"rewards/accuracy_reward": 0.32490234375,
|
|
"rewards/brier_reward": 0.505929458141327,
|
|
"rewards/confidence_uniqueness_reward": 0.6144041776657104,
|
|
"rewards/format_reward": 0.84814453125,
|
|
"rewards/frontier_aurc_reward": 0.3234916229732335,
|
|
"rewards/frontier_coverage_1": 0.33721864223480225,
|
|
"rewards/frontier_coverage_10": 0.33721864223480225,
|
|
"rewards/frontier_coverage_15": 0.33721864223480225,
|
|
"rewards/frontier_coverage_20": 0.33721864223480225,
|
|
"rewards/frontier_coverage_25": 0.33721864223480225,
|
|
"rewards/frontier_coverage_5": 0.33721864223480225,
|
|
"rewards/frontier_ece_reward": 0.32079982459545137,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.239251708984375,
|
|
"signal/accuracy_reward/group_std_mean": 0.2890691041946411,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.28125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1196258544921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1196258544921875,
|
|
"signal/advantage_abs_mean": 0.3007605969905853,
|
|
"signal/advantage_pre_scale_abs_mean": 0.3007605969905853,
|
|
"signal/advantage_pre_scale_std": 0.3971730887889862,
|
|
"signal/advantage_std": 0.3971730887889862,
|
|
"signal/brier_reward/centered_abs_mean": 0.28599911630153657,
|
|
"signal/brier_reward/group_std_mean": 0.34063884019851687,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03574988953769207,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03574988953769207,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.21475785672664643,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2769153594970703,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026844732090830804,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.026844732090830804,
|
|
"signal/format_reward/centered_abs_mean": 0.224554443359375,
|
|
"signal/format_reward/group_std_mean": 0.31889126896858216,
|
|
"signal/format_reward/group_zero_std_frac": 0.065625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1122772216796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1122772216796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.23223379356786608,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.27599835190922023,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.004156984848668799,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.004156984848668799,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.25027269423007964,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3035570979118347,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004479881143197417,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004479881143197417,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.25027269423007964,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3035570979118347,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004479881143197417,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004479881143197417,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.25027269423007964,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3035570979118347,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004479881143197417,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004479881143197417,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.25027269423007964,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3035570979118347,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004479881143197417,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004479881143197417,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.25027269423007964,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3035570979118347,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004479881143197417,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004479881143197417,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.25027269423007964,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3035570979118347,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004479881143197417,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004479881143197417,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.25118278712034225,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.2993951976299286,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03139784839004278,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03139784839004278,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4613156372239649,
|
|
"calibration/batch_distribution_entropy": 0.6885188701822018,
|
|
"calibration/buffer_distribution_entropy": 0.6525163906375744,
|
|
"calibration/confidence_entropy": 0.3612115527122598,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.33557730079398584,
|
|
"calibration/mean_confidence": 0.7816876164808539,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01103515625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1425.8,
|
|
"completions/mean_length": 162.32021484375,
|
|
"completions/mean_terminated_length": 146.99488830566406,
|
|
"completions/min_length": 1.8,
|
|
"completions/min_terminated_length": 1.8,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.028669551014900208,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0214,
|
|
"num_tokens": 69250412.0,
|
|
"reward": 0.8064048051834106,
|
|
"reward_std": 0.26577826142311095,
|
|
"rewards/accuracy_reward": 0.37060546875,
|
|
"rewards/brier_reward": 0.5703646183013916,
|
|
"rewards/confidence_uniqueness_reward": 0.6865696787834168,
|
|
"rewards/format_reward": 0.92236328125,
|
|
"rewards/frontier_aurc_reward": -0.006006188318133354,
|
|
"rewards/frontier_coverage_1": 0.05366070494055748,
|
|
"rewards/frontier_coverage_10": 0.05366070494055748,
|
|
"rewards/frontier_coverage_15": 0.05366070494055748,
|
|
"rewards/frontier_coverage_20": 0.05366070494055748,
|
|
"rewards/frontier_coverage_25": 0.05366070494055748,
|
|
"rewards/frontier_coverage_5": 0.05366070494055748,
|
|
"rewards/frontier_ece_reward": -0.022815992310643195,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.232647705078125,
|
|
"signal/accuracy_reward/group_std_mean": 0.2829224646091461,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.290625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1163238525390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1163238525390625,
|
|
"signal/advantage_abs_mean": 0.20600511133670807,
|
|
"signal/advantage_pre_scale_abs_mean": 0.20600511133670807,
|
|
"signal/advantage_pre_scale_std": 0.2877360999584198,
|
|
"signal/advantage_std": 0.2877360999584198,
|
|
"signal/brier_reward/centered_abs_mean": 0.2589739263057709,
|
|
"signal/brier_reward/group_std_mean": 0.31490403413772583,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03237174078822136,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03237174078822136,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.16161151528358458,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.20550169944763183,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020201439410448073,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020201439410448073,
|
|
"signal/format_reward/centered_abs_mean": 0.111956787109375,
|
|
"signal/format_reward/group_std_mean": 0.17785735428333282,
|
|
"signal/format_reward/group_zero_std_frac": 0.3625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0559783935546875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0559783935546875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.005545902531594038,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0080027237534523,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.927165228873491e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.927165228873491e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.09711904674768448,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15192094445228577,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017384308390319347,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017384308390319347,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09711904674768448,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15192094445228577,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017384308390319347,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017384308390319347,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09711904674768448,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15192094445228577,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017384308390319347,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017384308390319347,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09711904674768448,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.15192094445228577,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017384308390319347,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017384308390319347,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09711904674768448,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15192094445228577,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017384308390319347,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017384308390319347,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.09711904674768448,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15192094445228577,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017384308390319347,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017384308390319347,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.1027738630771637,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.12494452595710755,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012846732884645462,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012846732884645462,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5838333717990067,
|
|
"calibration/batch_distribution_entropy": 0.7631321019020263,
|
|
"calibration/buffer_distribution_entropy": 0.6752091827121237,
|
|
"calibration/confidence_entropy": 0.427675346192575,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3880188298739684,
|
|
"calibration/mean_confidence": 0.743169744444465,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0072265625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1216.2,
|
|
"completions/mean_length": 133.6181640625,
|
|
"completions/mean_terminated_length": 123.41706695556641,
|
|
"completions/min_length": 1.6,
|
|
"completions/min_terminated_length": 1.6,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.10266012698411942,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0038,
|
|
"num_tokens": 85551814.0,
|
|
"reward": 0.86251140832901,
|
|
"reward_std": 0.2213844656944275,
|
|
"rewards/accuracy_reward": 0.41123046875,
|
|
"rewards/brier_reward": 0.6216847538948059,
|
|
"rewards/confidence_uniqueness_reward": 0.7656373262405396,
|
|
"rewards/format_reward": 0.96044921875,
|
|
"rewards/frontier_aurc_reward": -0.005164883844554424,
|
|
"rewards/frontier_coverage_1": 0.04586975798010826,
|
|
"rewards/frontier_coverage_10": 0.04586975798010826,
|
|
"rewards/frontier_coverage_15": 0.04586975798010826,
|
|
"rewards/frontier_coverage_20": 0.04586975798010826,
|
|
"rewards/frontier_coverage_25": 0.04586975798010826,
|
|
"rewards/frontier_coverage_5": 0.04586975798010826,
|
|
"rewards/frontier_ece_reward": -0.01262117656879127,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.220159912109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.27431103587150574,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.284375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1100799560546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1100799560546875,
|
|
"signal/advantage_abs_mean": 0.1713700234889984,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1713700234889984,
|
|
"signal/advantage_pre_scale_std": 0.2442230075597763,
|
|
"signal/advantage_std": 0.2442230075597763,
|
|
"signal/brier_reward/centered_abs_mean": 0.23227280676364898,
|
|
"signal/brier_reward/group_std_mean": 0.28580942153930666,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029034100845456122,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.029034100845456122,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11165157109498977,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.14749074429273606,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013956446386873721,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013956446386873721,
|
|
"signal/format_reward/centered_abs_mean": 0.058660888671875,
|
|
"signal/format_reward/group_std_mean": 0.0945195160806179,
|
|
"signal/format_reward/group_zero_std_frac": 0.65,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0293304443359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0293304443359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004352754168212414,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.006485749594867229,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.79142945248168e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.79142945248168e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11262711882591248,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.171766459941864,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020160253159701826,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020160253159701826,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11262711882591248,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.171766459941864,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020160253159701826,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020160253159701826,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11262711882591248,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.171766459941864,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020160253159701826,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020160253159701826,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11262711882591248,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.171766459941864,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020160253159701826,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020160253159701826,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11262711882591248,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.171766459941864,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020160253159701826,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020160253159701826,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11262711882591248,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.171766459941864,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020160253159701826,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020160253159701826,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.09184739738702774,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.11263370960950851,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011480924673378468,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011480924673378468,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5599529783433583,
|
|
"calibration/batch_distribution_entropy": 0.8180527560081053,
|
|
"calibration/buffer_distribution_entropy": 0.7099110970064041,
|
|
"calibration/confidence_entropy": 0.5050432578890813,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.2930707046300085,
|
|
"calibration/mean_confidence": 0.6695370928113038,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0107421875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1365.6,
|
|
"completions/mean_length": 139.62607421875,
|
|
"completions/mean_terminated_length": 124.46354522705079,
|
|
"completions/min_length": 1.0,
|
|
"completions/min_terminated_length": 1.0,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.3033556342124939,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0144,
|
|
"num_tokens": 102026193.0,
|
|
"reward": 0.8633492946624756,
|
|
"reward_std": 0.22220987677574158,
|
|
"rewards/accuracy_reward": 0.414453125,
|
|
"rewards/brier_reward": 0.6465227484703064,
|
|
"rewards/confidence_uniqueness_reward": 0.7718554854393005,
|
|
"rewards/format_reward": 0.9494140625,
|
|
"rewards/frontier_aurc_reward": -0.004374950844794512,
|
|
"rewards/frontier_coverage_1": 0.04679640345275402,
|
|
"rewards/frontier_coverage_10": 0.04679640345275402,
|
|
"rewards/frontier_coverage_15": 0.04679640345275402,
|
|
"rewards/frontier_coverage_20": 0.04679640345275402,
|
|
"rewards/frontier_coverage_25": 0.04679640345275402,
|
|
"rewards/frontier_coverage_5": 0.04679640345275402,
|
|
"rewards/frontier_ece_reward": -0.006633454142138362,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2164794921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.2656884342432022,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.31875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10823974609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10823974609375,
|
|
"signal/advantage_abs_mean": 0.1734051823616028,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1734051823616028,
|
|
"signal/advantage_pre_scale_std": 0.25319576263427734,
|
|
"signal/advantage_std": 0.25319576263427734,
|
|
"signal/brier_reward/centered_abs_mean": 0.22392457127571105,
|
|
"signal/brier_reward/group_std_mean": 0.27409825921058656,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027990571409463882,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.027990571409463882,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1212245300412178,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1597886711359024,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015153066255152225,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015153066255152225,
|
|
"signal/format_reward/centered_abs_mean": 0.076416015625,
|
|
"signal/format_reward/group_std_mean": 0.11220613121986389,
|
|
"signal/format_reward/group_zero_std_frac": 0.6375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0382080078125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0382080078125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030647643376141786,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004675971809774637,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4859279043739664e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4859279043739664e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13435963690280914,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19624820053577424,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024050374049693346,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024050374049693346,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13435963690280914,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19624820053577424,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024050374049693346,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024050374049693346,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13435963690280914,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19624820053577424,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024050374049693346,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024050374049693346,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13435963690280914,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19624820053577424,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024050374049693346,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024050374049693346,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13435963690280914,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.19624820053577424,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024050374049693346,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024050374049693346,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13435963690280914,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19624820053577424,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024050374049693346,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024050374049693346,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.08245499283075333,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.10052948445081711,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.010306874103844166,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.010306874103844166,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.43093254462035047,
|
|
"calibration/batch_distribution_entropy": 0.8765791511807105,
|
|
"calibration/buffer_distribution_entropy": 0.7493890212367624,
|
|
"calibration/confidence_entropy": 0.5332302085205971,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.12442684122316035,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.18847028436719626,
|
|
"calibration/mean_confidence": 0.6052631008223052,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009765625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 974.0,
|
|
"completions/mean_length": 140.3001953125,
|
|
"completions/mean_terminated_length": 126.54303894042968,
|
|
"completions/min_length": 1.0,
|
|
"completions/min_terminated_length": 1.0,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.0450531542301178,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0177,
|
|
"num_tokens": 118572339.0,
|
|
"reward": 0.9136639595031738,
|
|
"reward_std": 0.17941873669624328,
|
|
"rewards/accuracy_reward": 0.4568359375,
|
|
"rewards/brier_reward": 0.7062686562538147,
|
|
"rewards/confidence_uniqueness_reward": 0.8257040023803711,
|
|
"rewards/format_reward": 0.97333984375,
|
|
"rewards/frontier_aurc_reward": -0.0037020944990217687,
|
|
"rewards/frontier_coverage_1": 0.05813024044036865,
|
|
"rewards/frontier_coverage_10": 0.05813024044036865,
|
|
"rewards/frontier_coverage_15": 0.05813024044036865,
|
|
"rewards/frontier_coverage_20": 0.05813024044036865,
|
|
"rewards/frontier_coverage_25": 0.05813024044036865,
|
|
"rewards/frontier_coverage_5": 0.05813024044036865,
|
|
"rewards/frontier_ece_reward": 0.007220498844981193,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19686279296875,
|
|
"signal/accuracy_reward/group_std_mean": 0.2502582728862762,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.325,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.098431396484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.098431396484375,
|
|
"signal/advantage_abs_mean": 0.13896718621253967,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13896718621253967,
|
|
"signal/advantage_pre_scale_std": 0.20438657999038695,
|
|
"signal/advantage_std": 0.20438657999038695,
|
|
"signal/brier_reward/centered_abs_mean": 0.1961173176765442,
|
|
"signal/brier_reward/group_std_mean": 0.24390378594398499,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024514664709568024,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.024514664709568024,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09459523856639862,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.12374730557203292,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011824404820799828,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011824404820799828,
|
|
"signal/format_reward/centered_abs_mean": 0.039300537109375,
|
|
"signal/format_reward/group_std_mean": 0.0625513531267643,
|
|
"signal/format_reward/group_zero_std_frac": 0.778125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0196502685546875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0196502685546875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022663983050733804,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0036506312899291515,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.056852849316783e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.056852849316783e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18296231627464293,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24793100357055664,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032750254031270742,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032750254031270742,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18296231627464293,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24793100357055664,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032750254031270742,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032750254031270742,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18296231627464293,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24793100357055664,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032750254031270742,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032750254031270742,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18296231627464293,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24793100357055664,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032750254031270742,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032750254031270742,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18296231627464293,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24793100357055664,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032750254031270742,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032750254031270742,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18296231627464293,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24793100357055664,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032750254031270742,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032750254031270742,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06804275140166283,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08398929536342621,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008505343925207853,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008505343925207853,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4637473590255504,
|
|
"calibration/batch_distribution_entropy": 0.9011286403654379,
|
|
"calibration/buffer_distribution_entropy": 0.7961353907846576,
|
|
"calibration/confidence_entropy": 0.5532398388702255,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.01019607843137255,
|
|
"calibration/coverage@20%": 0.02392156862745098,
|
|
"calibration/coverage@25%": 0.03686274509803922,
|
|
"calibration/coverage@30%": 0.054509803921568636,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.13673163536509028,
|
|
"calibration/mean_confidence": 0.485751228767752,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00703125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 953.2,
|
|
"completions/mean_length": 145.0564453125,
|
|
"completions/mean_terminated_length": 135.19951477050782,
|
|
"completions/min_length": 31.6,
|
|
"completions/min_terminated_length": 31.6,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.017669327557086945,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0129,
|
|
"num_tokens": 134974389.0,
|
|
"reward": 0.923030960559845,
|
|
"reward_std": 0.1417643427848816,
|
|
"rewards/accuracy_reward": 0.4427734375,
|
|
"rewards/brier_reward": 0.7313620209693908,
|
|
"rewards/confidence_uniqueness_reward": 0.8564581751823426,
|
|
"rewards/format_reward": 0.9859375,
|
|
"rewards/frontier_aurc_reward": -0.003509230772033334,
|
|
"rewards/frontier_coverage_1": 0.08638581186532975,
|
|
"rewards/frontier_coverage_10": 0.08638581186532975,
|
|
"rewards/frontier_coverage_15": 0.08638581186532975,
|
|
"rewards/frontier_coverage_20": 0.08638581186532975,
|
|
"rewards/frontier_coverage_25": 0.08638581186532975,
|
|
"rewards/frontier_coverage_5": 0.08638581186532975,
|
|
"rewards/frontier_ece_reward": 0.007863593101501466,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16705322265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.21422863900661468,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.415625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.083526611328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.083526611328125,
|
|
"signal/advantage_abs_mean": 0.11018433421850204,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11018433421850204,
|
|
"signal/advantage_pre_scale_std": 0.16467654705047607,
|
|
"signal/advantage_std": 0.16467654705047607,
|
|
"signal/brier_reward/centered_abs_mean": 0.1866983711719513,
|
|
"signal/brier_reward/group_std_mean": 0.23212920725345612,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02333729639649391,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02333729639649391,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0719639778137207,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09171251058578492,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008995497226715088,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008995497226715088,
|
|
"signal/format_reward/centered_abs_mean": 0.01739501953125,
|
|
"signal/format_reward/group_std_mean": 0.028746084496378898,
|
|
"signal/format_reward/group_zero_std_frac": 0.89375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008697509765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008697509765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017647896660491825,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002844266314059496,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1589733771397734e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1589733771397734e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22426398992538452,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2852416396141052,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004014325235038996,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004014325235038996,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22426398992538452,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2852416396141052,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004014325235038996,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004014325235038996,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22426398992538452,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2852416396141052,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004014325235038996,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004014325235038996,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22426398992538452,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2852416396141052,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004014325235038996,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004014325235038996,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22426398992538452,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2852416396141052,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004014325235038996,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004014325235038996,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22426398992538452,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2852416396141052,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004014325235038996,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004014325235038996,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05270521864295006,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0674271434545517,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006588152330368757,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006588152330368757,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27175397742115304,
|
|
"calibration/batch_distribution_entropy": 0.9232081653181499,
|
|
"calibration/buffer_distribution_entropy": 0.8419158033678219,
|
|
"calibration/confidence_entropy": 0.5273831934401841,
|
|
"calibration/coverage@0%": 0.012164204650829395,
|
|
"calibration/coverage@1%": 0.012164204650829395,
|
|
"calibration/coverage@10%": 0.07876753597900463,
|
|
"calibration/coverage@15%": 0.19833483371483523,
|
|
"calibration/coverage@20%": 0.3504713065890895,
|
|
"calibration/coverage@25%": 0.46684294476954935,
|
|
"calibration/coverage@30%": 0.606419362713756,
|
|
"calibration/coverage@5%": 0.035723291128458054,
|
|
"calibration/ece": 0.19987627269719255,
|
|
"calibration/mean_confidence": 0.44355403988022324,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.003515625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 870.0,
|
|
"completions/mean_length": 143.75546875,
|
|
"completions/mean_terminated_length": 138.84595947265626,
|
|
"completions/min_length": 42.2,
|
|
"completions/min_terminated_length": 42.2,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.054179396480321884,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0089,
|
|
"num_tokens": 151396877.0,
|
|
"reward": 0.9702192068099975,
|
|
"reward_std": 0.13579329252243041,
|
|
"rewards/accuracy_reward": 0.54228515625,
|
|
"rewards/brier_reward": 0.737048614025116,
|
|
"rewards/confidence_uniqueness_reward": 0.863895833492279,
|
|
"rewards/format_reward": 0.9849609375,
|
|
"rewards/frontier_aurc_reward": -0.0027967089787125587,
|
|
"rewards/frontier_coverage_1": 0.03856944553554058,
|
|
"rewards/frontier_coverage_10": 0.03856944553554058,
|
|
"rewards/frontier_coverage_15": 0.03856944553554058,
|
|
"rewards/frontier_coverage_20": 0.03856944553554058,
|
|
"rewards/frontier_coverage_25": 0.03856944553554058,
|
|
"rewards/frontier_coverage_5": 0.03856944553554058,
|
|
"rewards/frontier_ece_reward": 0.01908651553094387,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.164288330078125,
|
|
"signal/accuracy_reward/group_std_mean": 0.21441585719585418,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.403125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0821441650390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0821441650390625,
|
|
"signal/advantage_abs_mean": 0.10403890758752823,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10403890758752823,
|
|
"signal/advantage_pre_scale_std": 0.16093845069408416,
|
|
"signal/advantage_std": 0.16093845069408416,
|
|
"signal/brier_reward/centered_abs_mean": 0.18338664174079894,
|
|
"signal/brier_reward/group_std_mean": 0.22795365154743194,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022923330217599867,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022923330217599867,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06989959329366684,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09106495976448059,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008737449161708355,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008737449161708355,
|
|
"signal/format_reward/centered_abs_mean": 0.0210205078125,
|
|
"signal/format_reward/group_std_mean": 0.03324367478489876,
|
|
"signal/format_reward/group_zero_std_frac": 0.88125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01051025390625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01051025390625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014914550818502903,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0024216063786298035,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6697046632762066e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6697046632762066e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2453473687171936,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3083998620510101,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004391717724502087,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004391717724502087,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2453473687171936,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3083998620510101,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004391717724502087,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004391717724502087,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2453473687171936,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3083998620510101,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004391717724502087,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004391717724502087,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2453473687171936,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3083998620510101,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004391717724502087,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004391717724502087,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2453473687171936,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3083998620510101,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004391717724502087,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004391717724502087,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2453473687171936,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3083998620510101,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004391717724502087,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004391717724502087,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.042443787306547166,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05635495781898499,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005305473413318396,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005305473413318396,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.403125271003182,
|
|
"calibration/batch_distribution_entropy": 0.9391103501505299,
|
|
"calibration/buffer_distribution_entropy": 0.875463691913424,
|
|
"calibration/confidence_entropy": 0.5222249702517436,
|
|
"calibration/coverage@0%": 0.002834008097165992,
|
|
"calibration/coverage@1%": 0.002834008097165992,
|
|
"calibration/coverage@10%": 0.002834008097165992,
|
|
"calibration/coverage@15%": 0.023523663269579782,
|
|
"calibration/coverage@20%": 0.04461407067364151,
|
|
"calibration/coverage@25%": 0.0984996427720886,
|
|
"calibration/coverage@30%": 0.19978814939292627,
|
|
"calibration/coverage@5%": 0.002834008097165992,
|
|
"calibration/ece": 0.10212839080221536,
|
|
"calibration/mean_confidence": 0.4431201023375003,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0041015625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 918.6,
|
|
"completions/mean_length": 149.158203125,
|
|
"completions/mean_terminated_length": 143.44497680664062,
|
|
"completions/min_length": 50.0,
|
|
"completions/min_terminated_length": 50.0,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.07966148853302002,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0104,
|
|
"num_tokens": 167945185.0,
|
|
"reward": 0.9424496412277221,
|
|
"reward_std": 0.14887651801109314,
|
|
"rewards/accuracy_reward": 0.48271484375,
|
|
"rewards/brier_reward": 0.7433403611183167,
|
|
"rewards/confidence_uniqueness_reward": 0.8616537690162659,
|
|
"rewards/format_reward": 0.978125,
|
|
"rewards/frontier_aurc_reward": -0.0028730600606650114,
|
|
"rewards/frontier_coverage_1": 0.08680228143930435,
|
|
"rewards/frontier_coverage_10": 0.08680228143930435,
|
|
"rewards/frontier_coverage_15": 0.08680228143930435,
|
|
"rewards/frontier_coverage_20": 0.08680228143930435,
|
|
"rewards/frontier_coverage_25": 0.08680228143930435,
|
|
"rewards/frontier_coverage_5": 0.08680228143930435,
|
|
"rewards/frontier_ece_reward": 0.01707436852157116,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.175335693359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.21820703744888306,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0876678466796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0876678466796875,
|
|
"signal/advantage_abs_mean": 0.11476020514965057,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11476020514965057,
|
|
"signal/advantage_pre_scale_std": 0.17932912409305574,
|
|
"signal/advantage_std": 0.17932912409305574,
|
|
"signal/brier_reward/centered_abs_mean": 0.18417735695838927,
|
|
"signal/brier_reward/group_std_mean": 0.2291133314371109,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02302216961979866,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02302216961979866,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07359530031681061,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10155714750289917,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009199412539601326,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009199412539601326,
|
|
"signal/format_reward/centered_abs_mean": 0.03203125,
|
|
"signal/format_reward/group_std_mean": 0.05196922719478607,
|
|
"signal/format_reward/group_zero_std_frac": 0.809375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016015625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.016015625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001706640375778079,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002641899697482586,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0548862559953706e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0548862559953706e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.24131617248058318,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.30335493087768556,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004319559410214424,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004319559410214424,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.24131617248058318,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.30335493087768556,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004319559410214424,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004319559410214424,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.24131617248058318,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.30335493087768556,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004319559410214424,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004319559410214424,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.24131617248058318,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.30335493087768556,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004319559410214424,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004319559410214424,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.24131617248058318,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.30335493087768556,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004319559410214424,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004319559410214424,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.24131617248058318,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.30335493087768556,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004319559410214424,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004319559410214424,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.040847336500883104,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05442367494106293,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005105917062610388,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005105917062610388,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"eval_calibration/aurc": 0.6254440846908731,
|
|
"eval_calibration/batch_distribution_entropy": 0.8821632107470848,
|
|
"eval_calibration/buffer_distribution_entropy": 0.8901940690847239,
|
|
"eval_calibration/confidence_entropy": 0.520183064421665,
|
|
"eval_calibration/coverage@0%": 0.0,
|
|
"eval_calibration/coverage@1%": 0.0,
|
|
"eval_calibration/coverage@10%": 0.0,
|
|
"eval_calibration/coverage@15%": 0.0,
|
|
"eval_calibration/coverage@20%": 0.041666666666666664,
|
|
"eval_calibration/coverage@25%": 0.041666666666666664,
|
|
"eval_calibration/coverage@30%": 0.058333333333333334,
|
|
"eval_calibration/coverage@5%": 0.0,
|
|
"eval_calibration/ece": 0.28160651881720433,
|
|
"eval_calibration/mean_confidence": 0.46711323924731185,
|
|
"eval_completions/clipped_ratio": 0.004108297413793094,
|
|
"eval_completions/max_length": 939.5,
|
|
"eval_completions/max_terminated_length": 341.0,
|
|
"eval_completions/mean_length": 156.55832290649414,
|
|
"eval_completions/mean_terminated_length": 150.87627792358398,
|
|
"eval_completions/min_length": 69.0,
|
|
"eval_completions/min_terminated_length": 69.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 167945185.0,
|
|
"eval_reward": 0.8764741569757462,
|
|
"eval_reward_std": 0.2708371505141258,
|
|
"eval_rewards/accuracy_reward": 0.353515625,
|
|
"eval_rewards/brier_reward": 0.752171978354454,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.7996502369642258,
|
|
"eval_rewards/format_reward": 0.96875,
|
|
"eval_rewards/frontier_aurc_reward": -0.003503879823256284,
|
|
"eval_rewards/frontier_coverage_1": 0.18414541706442833,
|
|
"eval_rewards/frontier_coverage_10": 0.18414541706442833,
|
|
"eval_rewards/frontier_coverage_15": 0.18414541706442833,
|
|
"eval_rewards/frontier_coverage_20": 0.18414541706442833,
|
|
"eval_rewards/frontier_coverage_25": 0.18414541706442833,
|
|
"eval_rewards/frontier_coverage_5": 0.18414541706442833,
|
|
"eval_rewards/frontier_ece_reward": 0.01319264032645151,
|
|
"eval_runtime": 37.2237,
|
|
"eval_samples_per_second": 13.432,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4461669921875,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.47892439365386963,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22308349609375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22308349609375,
|
|
"eval_signal/advantage_abs_mean": 0.21810520812869072,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21810520812869072,
|
|
"eval_signal/advantage_pre_scale_std": 0.2705560587346554,
|
|
"eval_signal/advantage_std": 0.2705560587346554,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.21390501782298088,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2747611552476883,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02673812722787261,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02673812722787261,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.1026211753487587,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.15684263966977596,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012827646918594837,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012827646918594837,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.058837890625,
|
|
"eval_signal/format_reward/group_std_mean": 0.13523541949689388,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.375,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0294189453125,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0294189453125,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002764371281955391,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0045166065683588386,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.948224341205787e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.948224341205787e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3214282989501953,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.394027441740036,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005753566394560039,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005753566394560039,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3214282989501953,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.394027441740036,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005753566394560039,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005753566394560039,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3214282989501953,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.394027441740036,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005753566394560039,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005753566394560039,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3214282989501953,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.394027441740036,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005753566394560039,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005753566394560039,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.3214282989501953,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.394027441740036,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005753566394560039,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005753566394560039,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3214282989501953,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.394027441740036,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005753566394560039,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005753566394560039,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.049655829556286335,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.07456529140472412,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006206978694535792,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006206978694535792,
|
|
"eval_steps_per_second": 0.107,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"step": 50,
|
|
"train_probe_calibration/aurc": 0.34953725528411633,
|
|
"train_probe_calibration/batch_distribution_entropy": 0.9158039775330977,
|
|
"train_probe_calibration/buffer_distribution_entropy": 0.891133958747135,
|
|
"train_probe_calibration/confidence_entropy": 0.5133234399655757,
|
|
"train_probe_calibration/coverage@0%": 0.11164314516129031,
|
|
"train_probe_calibration/coverage@1%": 0.11164314516129031,
|
|
"train_probe_calibration/coverage@10%": 0.11164314516129031,
|
|
"train_probe_calibration/coverage@15%": 0.18220766129032256,
|
|
"train_probe_calibration/coverage@20%": 0.2537802419354839,
|
|
"train_probe_calibration/coverage@25%": 0.2850302419354839,
|
|
"train_probe_calibration/coverage@30%": 0.3631552419354839,
|
|
"train_probe_calibration/coverage@5%": 0.11164314516129031,
|
|
"train_probe_calibration/ece": 0.1722202620967742,
|
|
"train_probe_calibration/mean_confidence": 0.4648311491935484,
|
|
"train_probe_completions/clipped_ratio": 0.008216594827586216,
|
|
"train_probe_completions/max_length": 1455.25,
|
|
"train_probe_completions/max_terminated_length": 755.0,
|
|
"train_probe_completions/mean_length": 164.17631912231445,
|
|
"train_probe_completions/mean_terminated_length": 152.7919807434082,
|
|
"train_probe_completions/min_length": 71.0,
|
|
"train_probe_completions/min_terminated_length": 71.0,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 167945185.0,
|
|
"train_probe_reward": 0.9423353224992752,
|
|
"train_probe_reward_std": 0.27149440348148346,
|
|
"train_probe_rewards/accuracy_reward": 0.4921875,
|
|
"train_probe_rewards/brier_reward": 0.7489275336265564,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.8245857506990433,
|
|
"train_probe_rewards/format_reward": 0.974609375,
|
|
"train_probe_rewards/frontier_aurc_reward": -0.0028590288711711764,
|
|
"train_probe_rewards/frontier_coverage_1": 0.08961892500519753,
|
|
"train_probe_rewards/frontier_coverage_10": 0.08961892500519753,
|
|
"train_probe_rewards/frontier_coverage_15": 0.08961892500519753,
|
|
"train_probe_rewards/frontier_coverage_20": 0.08961892500519753,
|
|
"train_probe_rewards/frontier_coverage_25": 0.08961892500519753,
|
|
"train_probe_rewards/frontier_coverage_5": 0.08961892500519753,
|
|
"train_probe_rewards/frontier_ece_reward": 0.02139047277159989,
|
|
"train_probe_runtime": 54.1297,
|
|
"train_probe_samples_per_second": 9.237,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.489990234375,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.5028149038553238,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2449951171875,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2449951171875,
|
|
"train_probe_signal/advantage_abs_mean": 0.23267249390482903,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.23267249390482903,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.27059199661016464,
|
|
"train_probe_signal/advantage_std": 0.27059199661016464,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.2169180065393448,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.2719731330871582,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0271147508174181,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0271147508174181,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.08825866505503654,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.14233380556106567,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011032333131879568,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011032333131879568,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0484619140625,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.1234525553882122,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 0.375,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.02423095703125,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.02423095703125,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.002652477065566927,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0041865811217576265,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.747933689941419e-05,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.747933689941419e-05,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3308027759194374,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.424383707344532,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005921369651332498,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005921369651332498,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.3308027759194374,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.424383707344532,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005921369651332498,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005921369651332498,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.3308027759194374,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.424383707344532,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005921369651332498,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005921369651332498,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.3308027759194374,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.424383707344532,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005921369651332498,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005921369651332498,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.3308027759194374,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.424383707344532,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005921369651332498,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005921369651332498,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3308027759194374,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.424383707344532,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005921369651332498,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005921369651332498,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.051902798004448414,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.07279590144753456,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006487849750556052,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006487849750556052,
|
|
"train_probe_steps_per_second": 0.074
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3712372844316617,
|
|
"calibration/batch_distribution_entropy": 0.9609159777516597,
|
|
"calibration/buffer_distribution_entropy": 0.8971900490580224,
|
|
"calibration/confidence_entropy": 0.49239175054637546,
|
|
"calibration/coverage@0%": 0.002510460251046025,
|
|
"calibration/coverage@1%": 0.002510460251046025,
|
|
"calibration/coverage@10%": 0.002510460251046025,
|
|
"calibration/coverage@15%": 0.002510460251046025,
|
|
"calibration/coverage@20%": 0.002510460251046025,
|
|
"calibration/coverage@25%": 0.04713520670135028,
|
|
"calibration/coverage@30%": 0.26879748917877694,
|
|
"calibration/coverage@5%": 0.002510460251046025,
|
|
"calibration/ece": 0.16573751140661525,
|
|
"calibration/mean_confidence": 0.5205409540211041,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006640625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1219.2,
|
|
"completions/mean_length": 160.66025390625,
|
|
"completions/mean_terminated_length": 151.46548461914062,
|
|
"completions/min_length": 46.0,
|
|
"completions/min_terminated_length": 46.0,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.06872954219579697,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0122,
|
|
"num_tokens": 184827466.0,
|
|
"reward": 0.9402350187301636,
|
|
"reward_std": 0.14623880088329316,
|
|
"rewards/accuracy_reward": 0.48759765625,
|
|
"rewards/brier_reward": 0.7364905476570129,
|
|
"rewards/confidence_uniqueness_reward": 0.8593945026397705,
|
|
"rewards/format_reward": 0.9693359375,
|
|
"rewards/frontier_aurc_reward": -0.0029026484582573174,
|
|
"rewards/frontier_coverage_1": 0.09159794300794602,
|
|
"rewards/frontier_coverage_10": 0.09159794300794602,
|
|
"rewards/frontier_coverage_15": 0.09159794300794602,
|
|
"rewards/frontier_coverage_20": 0.09159794300794602,
|
|
"rewards/frontier_coverage_25": 0.09159794300794602,
|
|
"rewards/frontier_coverage_5": 0.09159794300794602,
|
|
"rewards/frontier_ece_reward": 0.019975333102047445,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.145709228515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.19187160730361938,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.453125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0728546142578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0728546142578125,
|
|
"signal/advantage_abs_mean": 0.1101900115609169,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1101900115609169,
|
|
"signal/advantage_pre_scale_std": 0.17681396007537842,
|
|
"signal/advantage_std": 0.17681396007537842,
|
|
"signal/brier_reward/centered_abs_mean": 0.19099677503108978,
|
|
"signal/brier_reward/group_std_mean": 0.23669040203094482,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023874596878886222,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.023874596878886222,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07248903661966324,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09891549348831177,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009061129577457906,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009061129577457906,
|
|
"signal/format_reward/centered_abs_mean": 0.0377197265625,
|
|
"signal/format_reward/group_std_mean": 0.05772824138402939,
|
|
"signal/format_reward/group_zero_std_frac": 0.8,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01885986328125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01885986328125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021433203713968397,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032778474967926742,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.8365434011211616e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.8365434011211616e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22156096398830413,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.282322096824646,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003965941350907087,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003965941350907087,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22156096398830413,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.282322096824646,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003965941350907087,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003965941350907087,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22156096398830413,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.282322096824646,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003965941350907087,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003965941350907087,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22156096398830413,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.282322096824646,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003965941350907087,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003965941350907087,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22156096398830413,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.282322096824646,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003965941350907087,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003965941350907087,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22156096398830413,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.282322096824646,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003965941350907087,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003965941350907087,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04264579936861992,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05547093003988266,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00533072492107749,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00533072492107749,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.35738801713319707,
|
|
"calibration/batch_distribution_entropy": 0.9222789410563648,
|
|
"calibration/buffer_distribution_entropy": 0.9062810656530844,
|
|
"calibration/confidence_entropy": 0.4314712566952025,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.004733727810650888,
|
|
"calibration/coverage@15%": 0.010794333871256948,
|
|
"calibration/coverage@20%": 0.08998625306317615,
|
|
"calibration/coverage@25%": 0.2524840938888444,
|
|
"calibration/coverage@30%": 0.3501611604120648,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.129347005564316,
|
|
"calibration/mean_confidence": 0.5669145772975883,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0068359375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 990.2,
|
|
"completions/mean_length": 165.06435546875,
|
|
"completions/mean_terminated_length": 155.6231201171875,
|
|
"completions/min_length": 49.4,
|
|
"completions/min_terminated_length": 49.4,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.06620907038450241,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.013,
|
|
"num_tokens": 201332541.0,
|
|
"reward": 0.9339034557342529,
|
|
"reward_std": 0.15482064783573152,
|
|
"rewards/accuracy_reward": 0.4763671875,
|
|
"rewards/brier_reward": 0.7338881492614746,
|
|
"rewards/confidence_uniqueness_reward": 0.8472534418106079,
|
|
"rewards/format_reward": 0.966796875,
|
|
"rewards/frontier_aurc_reward": -0.0033207187429070474,
|
|
"rewards/frontier_coverage_1": 0.11109301298856736,
|
|
"rewards/frontier_coverage_10": 0.11109301298856736,
|
|
"rewards/frontier_coverage_15": 0.11109301298856736,
|
|
"rewards/frontier_coverage_20": 0.11109301298856736,
|
|
"rewards/frontier_coverage_25": 0.11109301298856736,
|
|
"rewards/frontier_coverage_5": 0.11109301298856736,
|
|
"rewards/frontier_ece_reward": 0.022454247623682023,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14830322265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1960592031478882,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.44375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.074151611328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.074151611328125,
|
|
"signal/advantage_abs_mean": 0.11740224063396454,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11740224063396454,
|
|
"signal/advantage_pre_scale_std": 0.19125163555145264,
|
|
"signal/advantage_std": 0.19125163555145264,
|
|
"signal/brier_reward/centered_abs_mean": 0.19765847623348237,
|
|
"signal/brier_reward/group_std_mean": 0.24587923288345337,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024707309529185296,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.024707309529185296,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08868281245231628,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.11674559116363525,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011085351556539535,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011085351556539535,
|
|
"signal/format_reward/centered_abs_mean": 0.04295654296875,
|
|
"signal/format_reward/group_std_mean": 0.06320370435714721,
|
|
"signal/format_reward/group_zero_std_frac": 0.790625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.021478271484375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.021478271484375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003104797238484025,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004736031871289015,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.557586846407503e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.557586846407503e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20398018062114714,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.265421861410141,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036512451246380807,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036512451246380807,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20398018062114714,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.265421861410141,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036512451246380807,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036512451246380807,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20398018062114714,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.265421861410141,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036512451246380807,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036512451246380807,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20398018062114714,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.265421861410141,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036512451246380807,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036512451246380807,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20398018062114714,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.265421861410141,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036512451246380807,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036512451246380807,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20398018062114714,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.265421861410141,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036512451246380807,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036512451246380807,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0457615964114666,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05886110737919807,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005720199551433325,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005720199551433325,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29643063095441663,
|
|
"calibration/batch_distribution_entropy": 0.9171947927143869,
|
|
"calibration/buffer_distribution_entropy": 0.9100448664327612,
|
|
"calibration/confidence_entropy": 0.4175287827982107,
|
|
"calibration/coverage@0%": 0.014481409001956946,
|
|
"calibration/coverage@1%": 0.014481409001956946,
|
|
"calibration/coverage@10%": 0.09575864838103554,
|
|
"calibration/coverage@15%": 0.2081677205074029,
|
|
"calibration/coverage@20%": 0.33050935136204285,
|
|
"calibration/coverage@25%": 0.44360224553076566,
|
|
"calibration/coverage@30%": 0.5479605902693373,
|
|
"calibration/coverage@5%": 0.01643835616438356,
|
|
"calibration/ece": 0.1376342119353494,
|
|
"calibration/mean_confidence": 0.5811524191443811,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0072265625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1322.8,
|
|
"completions/mean_length": 168.00712890625,
|
|
"completions/mean_terminated_length": 158.03106689453125,
|
|
"completions/min_length": 36.4,
|
|
"completions/min_terminated_length": 36.4,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.31033796072006226,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0181,
|
|
"num_tokens": 218085158.0,
|
|
"reward": 0.9355008006095886,
|
|
"reward_std": 0.18306846916675568,
|
|
"rewards/accuracy_reward": 0.4966796875,
|
|
"rewards/brier_reward": 0.7259644269943237,
|
|
"rewards/confidence_uniqueness_reward": 0.8441673517227173,
|
|
"rewards/format_reward": 0.9564453125,
|
|
"rewards/frontier_aurc_reward": -0.002846927708014846,
|
|
"rewards/frontier_coverage_1": 0.09027891755104064,
|
|
"rewards/frontier_coverage_10": 0.09027891755104064,
|
|
"rewards/frontier_coverage_15": 0.09027891755104064,
|
|
"rewards/frontier_coverage_20": 0.09027891755104064,
|
|
"rewards/frontier_coverage_25": 0.09027891755104064,
|
|
"rewards/frontier_coverage_5": 0.09027891755104064,
|
|
"rewards/frontier_ece_reward": 0.02421446852385998,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.179248046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.22900831699371338,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0896240234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0896240234375,
|
|
"signal/advantage_abs_mean": 0.1380708247423172,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1380708247423172,
|
|
"signal/advantage_pre_scale_std": 0.22031235992908477,
|
|
"signal/advantage_std": 0.22031235992908477,
|
|
"signal/brier_reward/centered_abs_mean": 0.2126880943775177,
|
|
"signal/brier_reward/group_std_mean": 0.2632372736930847,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026586011797189713,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.026586011797189713,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09807170182466507,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1343176171183586,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012258962728083134,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012258962728083134,
|
|
"signal/format_reward/centered_abs_mean": 0.06129150390625,
|
|
"signal/format_reward/group_std_mean": 0.09367451593279838,
|
|
"signal/format_reward/group_zero_std_frac": 0.68125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.030645751953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.030645751953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029108581598848104,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004493788257241249,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.2104357746429744e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.2104357746429744e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21561312973499297,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.28178144097328184,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003859474789351225,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003859474789351225,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21561312973499297,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.28178144097328184,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003859474789351225,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003859474789351225,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21561312973499297,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.28178144097328184,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003859474789351225,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003859474789351225,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21561312973499297,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.28178144097328184,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003859474789351225,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003859474789351225,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21561312973499297,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.28178144097328184,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003859474789351225,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003859474789351225,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21561312973499297,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.28178144097328184,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003859474789351225,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003859474789351225,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.043842590600252154,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.056413907557725906,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005480323825031519,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005480323825031519,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.46571920158011276,
|
|
"calibration/batch_distribution_entropy": 0.9178738567052317,
|
|
"calibration/buffer_distribution_entropy": 0.9163612020518315,
|
|
"calibration/confidence_entropy": 0.3947332396271469,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.027000000000000003,
|
|
"calibration/coverage@20%": 0.04491176470588236,
|
|
"calibration/coverage@25%": 0.17707901232241774,
|
|
"calibration/coverage@30%": 0.25417977729208274,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.19856830881468507,
|
|
"calibration/mean_confidence": 0.40029018840683345,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.065234375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1464.2,
|
|
"completions/mean_length": 242.76435546875,
|
|
"completions/mean_terminated_length": 152.45449523925782,
|
|
"completions/min_length": 36.2,
|
|
"completions/min_terminated_length": 36.2,
|
|
"epoch": 0.224,
|
|
"grad_norm": 2.9691953659057617,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0767,
|
|
"num_tokens": 235724249.0,
|
|
"reward": 0.6787481069564819,
|
|
"reward_std": 0.34110564887523653,
|
|
"rewards/accuracy_reward": 0.33427734375,
|
|
"rewards/brier_reward": 0.5312727630138397,
|
|
"rewards/confidence_uniqueness_reward": 0.6269549608230591,
|
|
"rewards/format_reward": 0.7095703125,
|
|
"rewards/frontier_aurc_reward": -0.0023390050046145916,
|
|
"rewards/frontier_coverage_1": 0.09750215262174607,
|
|
"rewards/frontier_coverage_10": 0.09750215262174607,
|
|
"rewards/frontier_coverage_15": 0.09750215262174607,
|
|
"rewards/frontier_coverage_20": 0.09750215262174607,
|
|
"rewards/frontier_coverage_25": 0.09750215262174607,
|
|
"rewards/frontier_coverage_5": 0.09750215262174607,
|
|
"rewards/frontier_ece_reward": 0.012927726469933986,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.188677978515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.23931180834770202,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.359375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0943389892578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0943389892578125,
|
|
"signal/advantage_abs_mean": 0.2798399984836578,
|
|
"signal/advantage_pre_scale_abs_mean": 0.2798399984836578,
|
|
"signal/advantage_pre_scale_std": 0.3601413905620575,
|
|
"signal/advantage_std": 0.3601413905620575,
|
|
"signal/brier_reward/centered_abs_mean": 0.29879134297370913,
|
|
"signal/brier_reward/group_std_mean": 0.3516451418399811,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03734891787171364,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03734891787171364,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2545173615217209,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.3105557501316071,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03181467019021511,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03181467019021511,
|
|
"signal/format_reward/centered_abs_mean": 0.27607421875,
|
|
"signal/format_reward/group_std_mean": 0.3350002527236938,
|
|
"signal/format_reward/group_zero_std_frac": 0.159375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.138037109375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.138037109375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002703424310311675,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004504935536533594,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.839129323954694e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.839129323954694e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2409254640340805,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3174335896968842,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004312565550208092,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004312565550208092,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2409254640340805,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3174335896968842,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004312565550208092,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004312565550208092,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2409254640340805,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3174335896968842,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004312565550208092,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004312565550208092,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2409254640340805,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3174335896968842,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004312565550208092,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004312565550208092,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2409254640340805,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3174335896968842,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004312565550208092,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004312565550208092,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2409254640340805,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3174335896968842,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004312565550208092,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004312565550208092,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0325216319411993,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.044718362390995026,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004065203992649913,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004065203992649913,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6752260460289098,
|
|
"calibration/batch_distribution_entropy": 0.8585007362093007,
|
|
"calibration/buffer_distribution_entropy": 0.921174766327918,
|
|
"calibration/confidence_entropy": 0.3302784425160673,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3135163470474769,
|
|
"calibration/mean_confidence": 0.4402562737874036,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.762109375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1532.2,
|
|
"completions/mean_length": 1249.8326171875,
|
|
"completions/mean_terminated_length": 387.5689727783203,
|
|
"completions/min_length": 3.6,
|
|
"completions/min_terminated_length": 3.6,
|
|
"epoch": 0.24,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0177,
|
|
"num_tokens": 263774215.0,
|
|
"reward": 0.0329528481233865,
|
|
"reward_std": 0.07450879570096731,
|
|
"rewards/accuracy_reward": 0.01083984375,
|
|
"rewards/brier_reward": 0.02677628120291047,
|
|
"rewards/confidence_uniqueness_reward": 0.03159494288265705,
|
|
"rewards/format_reward": 0.03857421875,
|
|
"rewards/frontier_aurc_reward": -0.0002577310428023338,
|
|
"rewards/frontier_coverage_1": 0.009148352436022833,
|
|
"rewards/frontier_coverage_10": 0.009148352436022833,
|
|
"rewards/frontier_coverage_15": 0.009148352436022833,
|
|
"rewards/frontier_coverage_20": 0.009148352436022833,
|
|
"rewards/frontier_coverage_25": 0.009148352436022833,
|
|
"rewards/frontier_coverage_5": 0.009148352436022833,
|
|
"rewards/frontier_ece_reward": -0.00022805376793257892,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.017510986328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.02862224280834198,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.896875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0087554931640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0087554931640625,
|
|
"signal/advantage_abs_mean": 0.04833462685346603,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04833462685346603,
|
|
"signal/advantage_pre_scale_std": 0.10756354965269566,
|
|
"signal/advantage_std": 0.10756354965269566,
|
|
"signal/brier_reward/centered_abs_mean": 0.04109984996030107,
|
|
"signal/brier_reward/group_std_mean": 0.0659692483022809,
|
|
"signal/brier_reward/group_zero_std_frac": 0.71875,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005137481245037634,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.005137481245037634,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04622841775417328,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07011332884430885,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.7125,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00577855221927166,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00577855221927166,
|
|
"signal/format_reward/centered_abs_mean": 0.056903076171875,
|
|
"signal/format_reward/group_std_mean": 0.0884034713730216,
|
|
"signal/format_reward/group_zero_std_frac": 0.69375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0284515380859375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0284515380859375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0004303819587221369,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0009567889268510043,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.69375,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.70383680901432e-06,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.70383680901432e-06,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.026687611715169625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.04987622057087719,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.7125,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00047770824676263146,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00047770824676263146,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.026687611715169625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.04987622057087719,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.7125,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00047770824676263146,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00047770824676263146,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.026687611715169625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.04987622057087719,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.7125,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00047770824676263146,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00047770824676263146,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.026687611715169625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.04987622057087719,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.7125,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00047770824676263146,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00047770824676263146,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.026687611715169625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.04987622057087719,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.7125,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00047770824676263146,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00047770824676263146,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.026687611715169625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.04987622057087719,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.7125,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00047770824676263146,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00047770824676263146,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00249036728637293,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005515742604620755,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.69375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00031129591079661625,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00031129591079661625,
|
|
"step": 75
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.88017578125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1533.2,
|
|
"completions/mean_length": 1385.08759765625,
|
|
"completions/mean_terminated_length": 275.3055450439453,
|
|
"completions/min_length": 2.2,
|
|
"completions/min_terminated_length": 2.2,
|
|
"epoch": 0.256,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 293012328.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 80
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.96201171875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1481.6,
|
|
"completions/mean_length": 1489.7912109375,
|
|
"completions/mean_terminated_length": 334.21091918945314,
|
|
"completions/min_length": 2.8,
|
|
"completions/min_terminated_length": 2.8,
|
|
"epoch": 0.272,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 323233486.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 85
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.98466796875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1513.6,
|
|
"completions/mean_length": 1520.5732421875,
|
|
"completions/mean_terminated_length": 536.7734497070312,
|
|
"completions/min_length": 9.4,
|
|
"completions/min_terminated_length": 9.4,
|
|
"epoch": 0.288,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 353762332.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 90
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.98818359375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1392.0,
|
|
"completions/mean_length": 1522.99775390625,
|
|
"completions/mean_terminated_length": 435.09649353027345,
|
|
"completions/min_length": 8.4,
|
|
"completions/min_terminated_length": 8.4,
|
|
"epoch": 0.304,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 384287781.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 95
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9904296875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1392.0,
|
|
"completions/mean_length": 1525.1625,
|
|
"completions/mean_terminated_length": 416.81488647460935,
|
|
"completions/min_length": 12.0,
|
|
"completions/min_terminated_length": 12.0,
|
|
"epoch": 0.32,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 414994149.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"eval_completions/clipped_ratio": 0.998046875,
|
|
"eval_completions/max_length": 1536.0,
|
|
"eval_completions/max_terminated_length": 231.5,
|
|
"eval_completions/mean_length": 1534.80859375,
|
|
"eval_completions/mean_terminated_length": 231.5,
|
|
"eval_completions/min_length": 1383.5,
|
|
"eval_completions/min_terminated_length": 231.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 414994149.0,
|
|
"eval_reward": 0.0,
|
|
"eval_reward_std": 0.0,
|
|
"eval_rewards/accuracy_reward": 0.0,
|
|
"eval_rewards/brier_reward": 0.0,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.0,
|
|
"eval_rewards/format_reward": 0.0,
|
|
"eval_rewards/frontier_aurc_reward": 0.0,
|
|
"eval_rewards/frontier_coverage_1": 0.0,
|
|
"eval_rewards/frontier_coverage_10": 0.0,
|
|
"eval_rewards/frontier_coverage_15": 0.0,
|
|
"eval_rewards/frontier_coverage_20": 0.0,
|
|
"eval_rewards/frontier_coverage_25": 0.0,
|
|
"eval_rewards/frontier_coverage_5": 0.0,
|
|
"eval_rewards/frontier_ece_reward": 0.0,
|
|
"eval_runtime": 74.8012,
|
|
"eval_samples_per_second": 6.684,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/advantage_abs_mean": 0.0,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"eval_signal/advantage_pre_scale_std": 0.0,
|
|
"eval_signal/advantage_std": 0.0,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/brier_reward/group_std_mean": 0.0,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.053,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"step": 100,
|
|
"train_probe_completions/clipped_ratio": 0.994140625,
|
|
"train_probe_completions/max_length": 1536.0,
|
|
"train_probe_completions/max_terminated_length": 567.5,
|
|
"train_probe_completions/mean_length": 1531.43359375,
|
|
"train_probe_completions/mean_terminated_length": 567.5,
|
|
"train_probe_completions/min_length": 951.5,
|
|
"train_probe_completions/min_terminated_length": 567.5,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 414994149.0,
|
|
"train_probe_reward": 0.0,
|
|
"train_probe_reward_std": 0.0,
|
|
"train_probe_rewards/accuracy_reward": 0.0,
|
|
"train_probe_rewards/brier_reward": 0.0,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.0,
|
|
"train_probe_rewards/format_reward": 0.0,
|
|
"train_probe_rewards/frontier_aurc_reward": 0.0,
|
|
"train_probe_rewards/frontier_coverage_1": 0.0,
|
|
"train_probe_rewards/frontier_coverage_10": 0.0,
|
|
"train_probe_rewards/frontier_coverage_15": 0.0,
|
|
"train_probe_rewards/frontier_coverage_20": 0.0,
|
|
"train_probe_rewards/frontier_coverage_25": 0.0,
|
|
"train_probe_rewards/frontier_coverage_5": 0.0,
|
|
"train_probe_rewards/frontier_ece_reward": 0.0,
|
|
"train_probe_runtime": 73.546,
|
|
"train_probe_samples_per_second": 6.798,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.0,
|
|
"train_probe_signal/advantage_std": 0.0,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_steps_per_second": 0.054
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99091796875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1429.0,
|
|
"completions/mean_length": 1526.96376953125,
|
|
"completions/mean_terminated_length": 541.3440856933594,
|
|
"completions/min_length": 21.6,
|
|
"completions/min_terminated_length": 21.6,
|
|
"epoch": 0.336,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 445352690.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 105
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.991015625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1341.0,
|
|
"completions/mean_length": 1526.5474609375,
|
|
"completions/mean_terminated_length": 485.99510498046874,
|
|
"completions/min_length": 24.6,
|
|
"completions/min_terminated_length": 24.6,
|
|
"epoch": 0.352,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 476244952.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 110
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99072265625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1438.6,
|
|
"completions/mean_length": 1526.39052734375,
|
|
"completions/mean_terminated_length": 499.4287414550781,
|
|
"completions/min_length": 19.0,
|
|
"completions/min_terminated_length": 19.0,
|
|
"epoch": 0.368,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 506940663.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 115
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9912109375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1377.2,
|
|
"completions/mean_length": 1526.468359375,
|
|
"completions/mean_terminated_length": 455.91710205078124,
|
|
"completions/min_length": 28.0,
|
|
"completions/min_terminated_length": 28.0,
|
|
"epoch": 0.384,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 537428211.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 120
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99150390625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1416.2,
|
|
"completions/mean_length": 1527.345703125,
|
|
"completions/mean_terminated_length": 484.37660522460936,
|
|
"completions/min_length": 32.0,
|
|
"completions/min_terminated_length": 32.0,
|
|
"epoch": 0.4,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 568104679.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 125
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.98994140625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1400.4,
|
|
"completions/mean_length": 1525.31591796875,
|
|
"completions/mean_terminated_length": 484.96845703125,
|
|
"completions/min_length": 15.0,
|
|
"completions/min_terminated_length": 15.0,
|
|
"epoch": 0.416,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 598605098.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 130
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.990234375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1392.6,
|
|
"completions/mean_length": 1525.57548828125,
|
|
"completions/mean_terminated_length": 465.9883239746094,
|
|
"completions/min_length": 21.0,
|
|
"completions/min_terminated_length": 21.0,
|
|
"epoch": 0.432,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 629241327.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 135
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99169921875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1312.0,
|
|
"completions/mean_length": 1527.14140625,
|
|
"completions/mean_terminated_length": 479.5002502441406,
|
|
"completions/min_length": 21.4,
|
|
"completions/min_terminated_length": 21.4,
|
|
"epoch": 0.448,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 659832055.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 140
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9908203125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1349.6,
|
|
"completions/mean_length": 1526.08798828125,
|
|
"completions/mean_terminated_length": 470.441455078125,
|
|
"completions/min_length": 30.8,
|
|
"completions/min_terminated_length": 30.8,
|
|
"epoch": 0.464,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 690630012.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 145
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.992578125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1409.6,
|
|
"completions/mean_length": 1527.8091796875,
|
|
"completions/mean_terminated_length": 447.0835357666016,
|
|
"completions/min_length": 19.8,
|
|
"completions/min_terminated_length": 19.8,
|
|
"epoch": 0.48,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 721322810.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"eval_completions/clipped_ratio": 0.9876751077586207,
|
|
"eval_completions/max_length": 1536.0,
|
|
"eval_completions/max_terminated_length": 476.0,
|
|
"eval_completions/mean_length": 1521.2329711914062,
|
|
"eval_completions/mean_terminated_length": 361.83333587646484,
|
|
"eval_completions/min_length": 303.0,
|
|
"eval_completions/min_terminated_length": 303.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 721322810.0,
|
|
"eval_reward": 0.0,
|
|
"eval_reward_std": 0.0,
|
|
"eval_rewards/accuracy_reward": 0.0,
|
|
"eval_rewards/brier_reward": 0.0,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.0,
|
|
"eval_rewards/format_reward": 0.0,
|
|
"eval_rewards/frontier_aurc_reward": 0.0,
|
|
"eval_rewards/frontier_coverage_1": 0.0,
|
|
"eval_rewards/frontier_coverage_10": 0.0,
|
|
"eval_rewards/frontier_coverage_15": 0.0,
|
|
"eval_rewards/frontier_coverage_20": 0.0,
|
|
"eval_rewards/frontier_coverage_25": 0.0,
|
|
"eval_rewards/frontier_coverage_5": 0.0,
|
|
"eval_rewards/frontier_ece_reward": 0.0,
|
|
"eval_runtime": 74.8404,
|
|
"eval_samples_per_second": 6.681,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/advantage_abs_mean": 0.0,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"eval_signal/advantage_pre_scale_std": 0.0,
|
|
"eval_signal/advantage_std": 0.0,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/brier_reward/group_std_mean": 0.0,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.053,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"step": 150,
|
|
"train_probe_completions/clipped_ratio": 0.994140625,
|
|
"train_probe_completions/max_length": 1536.0,
|
|
"train_probe_completions/max_terminated_length": 483.25,
|
|
"train_probe_completions/mean_length": 1532.205078125,
|
|
"train_probe_completions/mean_terminated_length": 443.875,
|
|
"train_probe_completions/min_length": 1172.5,
|
|
"train_probe_completions/min_terminated_length": 404.5,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 721322810.0,
|
|
"train_probe_reward": 0.0,
|
|
"train_probe_reward_std": 0.0,
|
|
"train_probe_rewards/accuracy_reward": 0.0,
|
|
"train_probe_rewards/brier_reward": 0.0,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.0,
|
|
"train_probe_rewards/format_reward": 0.0,
|
|
"train_probe_rewards/frontier_aurc_reward": 0.0,
|
|
"train_probe_rewards/frontier_coverage_1": 0.0,
|
|
"train_probe_rewards/frontier_coverage_10": 0.0,
|
|
"train_probe_rewards/frontier_coverage_15": 0.0,
|
|
"train_probe_rewards/frontier_coverage_20": 0.0,
|
|
"train_probe_rewards/frontier_coverage_25": 0.0,
|
|
"train_probe_rewards/frontier_coverage_5": 0.0,
|
|
"train_probe_rewards/frontier_ece_reward": 0.0,
|
|
"train_probe_runtime": 72.8578,
|
|
"train_probe_samples_per_second": 6.863,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.0,
|
|
"train_probe_signal/advantage_std": 0.0,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_steps_per_second": 0.055
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99287109375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1102.8,
|
|
"completions/mean_length": 1527.67587890625,
|
|
"completions/mean_terminated_length": 357.08160400390625,
|
|
"completions/min_length": 28.2,
|
|
"completions/min_terminated_length": 28.2,
|
|
"epoch": 0.496,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 752274051.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 155
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.992578125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1313.8,
|
|
"completions/mean_length": 1528.16044921875,
|
|
"completions/mean_terminated_length": 473.29002075195314,
|
|
"completions/min_length": 33.6,
|
|
"completions/min_terminated_length": 33.6,
|
|
"epoch": 0.512,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 783068078.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 160
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.990625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1420.2,
|
|
"completions/mean_length": 1525.24150390625,
|
|
"completions/mean_terminated_length": 396.59959106445314,
|
|
"completions/min_length": 22.6,
|
|
"completions/min_terminated_length": 22.6,
|
|
"epoch": 0.528,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 813716087.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 165
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9921875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1404.4,
|
|
"completions/mean_length": 1527.6833984375,
|
|
"completions/mean_terminated_length": 487.17767944335935,
|
|
"completions/min_length": 21.4,
|
|
"completions/min_terminated_length": 21.4,
|
|
"epoch": 0.544,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 844523149.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 170
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9919921875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1442.6,
|
|
"completions/mean_length": 1527.085546875,
|
|
"completions/mean_terminated_length": 423.56866455078125,
|
|
"completions/min_length": 21.0,
|
|
"completions/min_terminated_length": 21.0,
|
|
"epoch": 0.56,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 874981913.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 175
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99130859375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1212.0,
|
|
"completions/mean_length": 1526.4693359375,
|
|
"completions/mean_terminated_length": 425.4572448730469,
|
|
"completions/min_length": 17.8,
|
|
"completions/min_terminated_length": 17.8,
|
|
"epoch": 0.576,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 905799583.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 180
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99072265625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1469.0,
|
|
"completions/mean_length": 1526.5130859375,
|
|
"completions/mean_terminated_length": 515.5063232421875,
|
|
"completions/min_length": 26.0,
|
|
"completions/min_terminated_length": 26.0,
|
|
"epoch": 0.592,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 936598789.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 185
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99072265625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1396.6,
|
|
"completions/mean_length": 1525.85185546875,
|
|
"completions/mean_terminated_length": 447.1800231933594,
|
|
"completions/min_length": 31.2,
|
|
"completions/min_terminated_length": 31.2,
|
|
"epoch": 0.608,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 967223000.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 190
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99111328125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1440.8,
|
|
"completions/mean_length": 1527.06923828125,
|
|
"completions/mean_terminated_length": 509.2791809082031,
|
|
"completions/min_length": 13.0,
|
|
"completions/min_terminated_length": 13.0,
|
|
"epoch": 0.624,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 998204093.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 195
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99365234375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1235.8,
|
|
"completions/mean_length": 1528.4357421875,
|
|
"completions/mean_terminated_length": 320.70795288085935,
|
|
"completions/min_length": 15.0,
|
|
"completions/min_terminated_length": 15.0,
|
|
"epoch": 0.64,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1029197963.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"eval_completions/clipped_ratio": 0.9900323275862069,
|
|
"eval_completions/max_length": 1536.0,
|
|
"eval_completions/max_terminated_length": 315.75,
|
|
"eval_completions/mean_length": 1524.6064758300781,
|
|
"eval_completions/mean_terminated_length": 265.1666717529297,
|
|
"eval_completions/min_length": 557.0,
|
|
"eval_completions/min_terminated_length": 173.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 1029197963.0,
|
|
"eval_reward": 0.0,
|
|
"eval_reward_std": 0.0,
|
|
"eval_rewards/accuracy_reward": 0.0,
|
|
"eval_rewards/brier_reward": 0.0,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.0,
|
|
"eval_rewards/format_reward": 0.0,
|
|
"eval_rewards/frontier_aurc_reward": 0.0,
|
|
"eval_rewards/frontier_coverage_1": 0.0,
|
|
"eval_rewards/frontier_coverage_10": 0.0,
|
|
"eval_rewards/frontier_coverage_15": 0.0,
|
|
"eval_rewards/frontier_coverage_20": 0.0,
|
|
"eval_rewards/frontier_coverage_25": 0.0,
|
|
"eval_rewards/frontier_coverage_5": 0.0,
|
|
"eval_rewards/frontier_ece_reward": 0.0,
|
|
"eval_runtime": 74.0994,
|
|
"eval_samples_per_second": 6.748,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/advantage_abs_mean": 0.0,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"eval_signal/advantage_pre_scale_std": 0.0,
|
|
"eval_signal/advantage_std": 0.0,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/brier_reward/group_std_mean": 0.0,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.054,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"step": 200,
|
|
"train_probe_completions/clipped_ratio": 0.9900323275862069,
|
|
"train_probe_completions/max_length": 1536.0,
|
|
"train_probe_completions/max_terminated_length": 444.75,
|
|
"train_probe_completions/mean_length": 1524.4424743652344,
|
|
"train_probe_completions/mean_terminated_length": 341.5,
|
|
"train_probe_completions/min_length": 238.25,
|
|
"train_probe_completions/min_terminated_length": 238.25,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 1029197963.0,
|
|
"train_probe_reward": 0.0,
|
|
"train_probe_reward_std": 0.0,
|
|
"train_probe_rewards/accuracy_reward": 0.0,
|
|
"train_probe_rewards/brier_reward": 0.0,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.0,
|
|
"train_probe_rewards/format_reward": 0.0,
|
|
"train_probe_rewards/frontier_aurc_reward": 0.0,
|
|
"train_probe_rewards/frontier_coverage_1": 0.0,
|
|
"train_probe_rewards/frontier_coverage_10": 0.0,
|
|
"train_probe_rewards/frontier_coverage_15": 0.0,
|
|
"train_probe_rewards/frontier_coverage_20": 0.0,
|
|
"train_probe_rewards/frontier_coverage_25": 0.0,
|
|
"train_probe_rewards/frontier_coverage_5": 0.0,
|
|
"train_probe_rewards/frontier_ece_reward": 0.0,
|
|
"train_probe_runtime": 70.6463,
|
|
"train_probe_samples_per_second": 7.078,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.0,
|
|
"train_probe_signal/advantage_std": 0.0,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_steps_per_second": 0.057
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99091796875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1409.4,
|
|
"completions/mean_length": 1526.26650390625,
|
|
"completions/mean_terminated_length": 446.5516021728516,
|
|
"completions/min_length": 15.4,
|
|
"completions/min_terminated_length": 15.4,
|
|
"epoch": 0.656,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1059683476.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 205
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99345703125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1140.0,
|
|
"completions/mean_length": 1528.49189453125,
|
|
"completions/mean_terminated_length": 370.8636016845703,
|
|
"completions/min_length": 45.0,
|
|
"completions/min_terminated_length": 45.0,
|
|
"epoch": 0.672,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1090248673.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 210
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99228515625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1345.6,
|
|
"completions/mean_length": 1527.9634765625,
|
|
"completions/mean_terminated_length": 482.64027709960936,
|
|
"completions/min_length": 23.2,
|
|
"completions/min_terminated_length": 23.2,
|
|
"epoch": 0.688,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1120848939.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 215
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99169921875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1463.8,
|
|
"completions/mean_length": 1527.13515625,
|
|
"completions/mean_terminated_length": 466.1702087402344,
|
|
"completions/min_length": 21.6,
|
|
"completions/min_terminated_length": 21.6,
|
|
"epoch": 0.704,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1151352947.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 220
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9904296875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1260.2,
|
|
"completions/mean_length": 1525.028515625,
|
|
"completions/mean_terminated_length": 393.17090759277346,
|
|
"completions/min_length": 18.8,
|
|
"completions/min_terminated_length": 18.8,
|
|
"epoch": 0.72,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1181979095.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 225
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.992578125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1274.4,
|
|
"completions/mean_length": 1527.6798828125,
|
|
"completions/mean_terminated_length": 406.5520385742187,
|
|
"completions/min_length": 25.2,
|
|
"completions/min_terminated_length": 25.2,
|
|
"epoch": 0.736,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1212562121.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 230
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99248046875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1376.4,
|
|
"completions/mean_length": 1527.54853515625,
|
|
"completions/mean_terminated_length": 413.2706298828125,
|
|
"completions/min_length": 26.2,
|
|
"completions/min_terminated_length": 26.2,
|
|
"epoch": 0.752,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1243431418.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 235
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.991015625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1362.2,
|
|
"completions/mean_length": 1525.55625,
|
|
"completions/mean_terminated_length": 372.6120971679687,
|
|
"completions/min_length": 14.2,
|
|
"completions/min_terminated_length": 14.2,
|
|
"epoch": 0.768,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1273985818.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 240
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99306640625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1481.8,
|
|
"completions/mean_length": 1529.205078125,
|
|
"completions/mean_terminated_length": 549.9662841796875,
|
|
"completions/min_length": 33.8,
|
|
"completions/min_terminated_length": 33.8,
|
|
"epoch": 0.784,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1304819246.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 245
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99208984375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1298.6,
|
|
"completions/mean_length": 1526.48974609375,
|
|
"completions/mean_terminated_length": 337.9350891113281,
|
|
"completions/min_length": 26.8,
|
|
"completions/min_terminated_length": 26.8,
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1335461061.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"eval_completions/clipped_ratio": 0.990234375,
|
|
"eval_completions/max_length": 1536.0,
|
|
"eval_completions/max_terminated_length": 590.75,
|
|
"eval_completions/mean_length": 1527.345703125,
|
|
"eval_completions/mean_terminated_length": 491.875,
|
|
"eval_completions/min_length": 777.0,
|
|
"eval_completions/min_terminated_length": 393.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 1335461061.0,
|
|
"eval_reward": 0.0,
|
|
"eval_reward_std": 0.0,
|
|
"eval_rewards/accuracy_reward": 0.0,
|
|
"eval_rewards/brier_reward": 0.0,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.0,
|
|
"eval_rewards/format_reward": 0.0,
|
|
"eval_rewards/frontier_aurc_reward": 0.0,
|
|
"eval_rewards/frontier_coverage_1": 0.0,
|
|
"eval_rewards/frontier_coverage_10": 0.0,
|
|
"eval_rewards/frontier_coverage_15": 0.0,
|
|
"eval_rewards/frontier_coverage_20": 0.0,
|
|
"eval_rewards/frontier_coverage_25": 0.0,
|
|
"eval_rewards/frontier_coverage_5": 0.0,
|
|
"eval_rewards/frontier_ece_reward": 0.0,
|
|
"eval_runtime": 73.9291,
|
|
"eval_samples_per_second": 6.763,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/advantage_abs_mean": 0.0,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"eval_signal/advantage_pre_scale_std": 0.0,
|
|
"eval_signal/advantage_std": 0.0,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/brier_reward/group_std_mean": 0.0,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.054,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"step": 250,
|
|
"train_probe_completions/clipped_ratio": 0.9861260775862069,
|
|
"train_probe_completions/max_length": 1536.0,
|
|
"train_probe_completions/max_terminated_length": 655.75,
|
|
"train_probe_completions/mean_length": 1522.4951477050781,
|
|
"train_probe_completions/mean_terminated_length": 420.75000762939453,
|
|
"train_probe_completions/min_length": 643.0,
|
|
"train_probe_completions/min_terminated_length": 259.0,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 1335461061.0,
|
|
"train_probe_reward": 0.0,
|
|
"train_probe_reward_std": 0.0,
|
|
"train_probe_rewards/accuracy_reward": 0.0,
|
|
"train_probe_rewards/brier_reward": 0.0,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.0,
|
|
"train_probe_rewards/format_reward": 0.0,
|
|
"train_probe_rewards/frontier_aurc_reward": 0.0,
|
|
"train_probe_rewards/frontier_coverage_1": 0.0,
|
|
"train_probe_rewards/frontier_coverage_10": 0.0,
|
|
"train_probe_rewards/frontier_coverage_15": 0.0,
|
|
"train_probe_rewards/frontier_coverage_20": 0.0,
|
|
"train_probe_rewards/frontier_coverage_25": 0.0,
|
|
"train_probe_rewards/frontier_coverage_5": 0.0,
|
|
"train_probe_rewards/frontier_ece_reward": 0.0,
|
|
"train_probe_runtime": 73.7012,
|
|
"train_probe_samples_per_second": 6.784,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.0,
|
|
"train_probe_signal/advantage_std": 0.0,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_steps_per_second": 0.054
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99208984375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1341.0,
|
|
"completions/mean_length": 1527.38896484375,
|
|
"completions/mean_terminated_length": 444.1418090820313,
|
|
"completions/min_length": 21.8,
|
|
"completions/min_terminated_length": 21.8,
|
|
"epoch": 0.816,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1366200692.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 255
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99091796875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1425.6,
|
|
"completions/mean_length": 1526.38564453125,
|
|
"completions/mean_terminated_length": 473.8236938476563,
|
|
"completions/min_length": 20.6,
|
|
"completions/min_terminated_length": 20.6,
|
|
"epoch": 0.832,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1396839233.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 260
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99111328125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1394.4,
|
|
"completions/mean_length": 1525.48974609375,
|
|
"completions/mean_terminated_length": 345.88324584960935,
|
|
"completions/min_length": 16.0,
|
|
"completions/min_terminated_length": 16.0,
|
|
"epoch": 0.848,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1427474616.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 265
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99072265625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1322.0,
|
|
"completions/mean_length": 1525.17509765625,
|
|
"completions/mean_terminated_length": 365.7697448730469,
|
|
"completions/min_length": 25.4,
|
|
"completions/min_terminated_length": 25.4,
|
|
"epoch": 0.864,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1458079225.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 270
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99296875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1370.4,
|
|
"completions/mean_length": 1528.1609375,
|
|
"completions/mean_terminated_length": 439.784326171875,
|
|
"completions/min_length": 17.0,
|
|
"completions/min_terminated_length": 17.0,
|
|
"epoch": 0.88,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1488874665.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 275
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.991015625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1472.0,
|
|
"completions/mean_length": 1526.5763671875,
|
|
"completions/mean_terminated_length": 484.0293884277344,
|
|
"completions/min_length": 18.6,
|
|
"completions/min_terminated_length": 18.6,
|
|
"epoch": 0.896,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1519617655.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 280
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9912109375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1358.6,
|
|
"completions/mean_length": 1526.38759765625,
|
|
"completions/mean_terminated_length": 452.73790283203124,
|
|
"completions/min_length": 30.6,
|
|
"completions/min_terminated_length": 30.6,
|
|
"epoch": 0.912,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1550299160.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 285
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9921875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1365.0,
|
|
"completions/mean_length": 1528.1560546875,
|
|
"completions/mean_terminated_length": 532.4930725097656,
|
|
"completions/min_length": 28.4,
|
|
"completions/min_terminated_length": 28.4,
|
|
"epoch": 0.928,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1580974294.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 290
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9904296875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1427.2,
|
|
"completions/mean_length": 1525.52470703125,
|
|
"completions/mean_terminated_length": 438.5868408203125,
|
|
"completions/min_length": 19.4,
|
|
"completions/min_terminated_length": 19.4,
|
|
"epoch": 0.944,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1611571091.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 295
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.98994140625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1269.6,
|
|
"completions/mean_length": 1524.29560546875,
|
|
"completions/mean_terminated_length": 369.2761505126953,
|
|
"completions/min_length": 7.4,
|
|
"completions/min_terminated_length": 7.4,
|
|
"epoch": 0.96,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1642120198.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"eval_completions/clipped_ratio": 0.9878771551724138,
|
|
"eval_completions/max_length": 1536.0,
|
|
"eval_completions/max_terminated_length": 676.0,
|
|
"eval_completions/mean_length": 1527.0393981933594,
|
|
"eval_completions/mean_terminated_length": 608.4583435058594,
|
|
"eval_completions/min_length": 946.0,
|
|
"eval_completions/min_terminated_length": 562.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 1642120198.0,
|
|
"eval_reward": 0.0,
|
|
"eval_reward_std": 0.0,
|
|
"eval_rewards/accuracy_reward": 0.0,
|
|
"eval_rewards/brier_reward": 0.0,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.0,
|
|
"eval_rewards/format_reward": 0.0,
|
|
"eval_rewards/frontier_aurc_reward": 0.0,
|
|
"eval_rewards/frontier_coverage_1": 0.0,
|
|
"eval_rewards/frontier_coverage_10": 0.0,
|
|
"eval_rewards/frontier_coverage_15": 0.0,
|
|
"eval_rewards/frontier_coverage_20": 0.0,
|
|
"eval_rewards/frontier_coverage_25": 0.0,
|
|
"eval_rewards/frontier_coverage_5": 0.0,
|
|
"eval_rewards/frontier_ece_reward": 0.0,
|
|
"eval_runtime": 76.0588,
|
|
"eval_samples_per_second": 6.574,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.0,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/advantage_abs_mean": 0.0,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"eval_signal/advantage_pre_scale_std": 0.0,
|
|
"eval_signal/advantage_std": 0.0,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/brier_reward/group_std_mean": 0.0,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.053,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"step": 300,
|
|
"train_probe_completions/clipped_ratio": 0.9978448275862069,
|
|
"train_probe_completions/max_length": 1536.0,
|
|
"train_probe_completions/max_terminated_length": 18.25,
|
|
"train_probe_completions/mean_length": 1532.8469848632812,
|
|
"train_probe_completions/mean_terminated_length": 18.25,
|
|
"train_probe_completions/min_length": 1170.25,
|
|
"train_probe_completions/min_terminated_length": 18.25,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 1642120198.0,
|
|
"train_probe_reward": 0.0,
|
|
"train_probe_reward_std": 0.0,
|
|
"train_probe_rewards/accuracy_reward": 0.0,
|
|
"train_probe_rewards/brier_reward": 0.0,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.0,
|
|
"train_probe_rewards/format_reward": 0.0,
|
|
"train_probe_rewards/frontier_aurc_reward": 0.0,
|
|
"train_probe_rewards/frontier_coverage_1": 0.0,
|
|
"train_probe_rewards/frontier_coverage_10": 0.0,
|
|
"train_probe_rewards/frontier_coverage_15": 0.0,
|
|
"train_probe_rewards/frontier_coverage_20": 0.0,
|
|
"train_probe_rewards/frontier_coverage_25": 0.0,
|
|
"train_probe_rewards/frontier_coverage_5": 0.0,
|
|
"train_probe_rewards/frontier_ece_reward": 0.0,
|
|
"train_probe_runtime": 72.6093,
|
|
"train_probe_samples_per_second": 6.886,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.0,
|
|
"train_probe_signal/advantage_std": 0.0,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_steps_per_second": 0.055
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99326171875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1463.8,
|
|
"completions/mean_length": 1529.2005859375,
|
|
"completions/mean_terminated_length": 520.1449768066407,
|
|
"completions/min_length": 33.4,
|
|
"completions/min_terminated_length": 33.4,
|
|
"epoch": 0.976,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1672640332.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 305
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.99375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1262.0,
|
|
"completions/mean_length": 1529.35078125,
|
|
"completions/mean_terminated_length": 473.9320007324219,
|
|
"completions/min_length": 45.4,
|
|
"completions/min_terminated_length": 45.4,
|
|
"epoch": 0.992,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1703429364.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 310
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.9893574617346939,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1243.0,
|
|
"completions/mean_length": 1523.6888427734375,
|
|
"completions/mean_terminated_length": 363.3791961669922,
|
|
"completions/min_length": 24.5,
|
|
"completions/min_terminated_length": 24.5,
|
|
"epoch": 0.9984,
|
|
"num_tokens": 1715682258.0,
|
|
"reward": 0.0,
|
|
"reward_std": 0.0,
|
|
"rewards/accuracy_reward": 0.0,
|
|
"rewards/brier_reward": 0.0,
|
|
"rewards/confidence_uniqueness_reward": 0.0,
|
|
"rewards/format_reward": 0.0,
|
|
"rewards/frontier_aurc_reward": 0.0,
|
|
"rewards/frontier_coverage_1": 0.0,
|
|
"rewards/frontier_coverage_10": 0.0,
|
|
"rewards/frontier_coverage_15": 0.0,
|
|
"rewards/frontier_coverage_20": 0.0,
|
|
"rewards/frontier_coverage_25": 0.0,
|
|
"rewards/frontier_coverage_5": 0.0,
|
|
"rewards/frontier_ece_reward": 0.0,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/group_std_mean": 0.0,
|
|
"signal/accuracy_reward/group_zero_std_frac": 1.0,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/advantage_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0,
|
|
"signal/advantage_pre_scale_std": 0.0,
|
|
"signal/advantage_std": 0.0,
|
|
"signal/brier_reward/centered_abs_mean": 0.0,
|
|
"signal/brier_reward/group_std_mean": 0.0,
|
|
"signal/brier_reward/group_zero_std_frac": 1.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 1.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 1.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0,
|
|
"step": 312,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.007585880621217,
|
|
"train_runtime": 111863.6752,
|
|
"train_samples_per_second": 0.179,
|
|
"train_steps_per_second": 0.003
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 312,
|
|
"num_input_tokens_seen": 1715682258,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|