{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.6364776407113771, "calibration/batch_distribution_entropy": 0.6455862671251419, "calibration/confidence_entropy": 0.3430452682301779, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4929321511720823, "calibration/mean_confidence": 0.7950546994723895, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 1504.4, "completions/max_terminated_length": 1504.4, "completions/mean_length": 214.4884765625, "completions/mean_terminated_length": 223.2016174316406, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.04218404367566109, "learning_rate": 3.1249999999999997e-07, "loss": 0.0088, "num_tokens": 17040394.0, "reward": 0.7162060260772705, "reward_std": 0.6058708906173706, "rewards/accgated_coverage_0": 0.3019547939300537, "rewards/accgated_coverage_1": 0.3019547939300537, "rewards/accgated_coverage_10": 0.3019547939300537, "rewards/accgated_coverage_15": 0.3019547939300537, "rewards/accgated_coverage_20": 0.3019547939300537, "rewards/accgated_coverage_25": 0.3019547939300537, "rewards/accgated_coverage_5": 0.3019547939300537, "rewards/accuracy_reward": 0.2205078125, "rewards/brier_reward": 0.3744439840316772, "rewards/confidence_uniqueness_reward": 0.4882001519203186, "rewards/format_reward": 0.67841796875, "rewards/frontier_aurc_reward": 0.3019547939300537, "rewards/frontier_ece_reward": 0.3019547939300537, "rewards/frontier_entropy_batch_reward": -0.6485954165458679, "signal/accgated_coverage_0/centered_abs_mean": 0.2956149399280548, "signal/accgated_coverage_0/group_std_mean": 0.3455429673194885, "signal/accgated_coverage_0/group_zero_std_frac": 0.003125, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.029561495035886766, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.029561495035886766, "signal/accgated_coverage_1/centered_abs_mean": 0.2956149399280548, "signal/accgated_coverage_1/group_std_mean": 0.3455429673194885, "signal/accgated_coverage_1/group_zero_std_frac": 0.003125, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.029561495035886766, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.029561495035886766, "signal/accgated_coverage_10/centered_abs_mean": 0.2956149399280548, "signal/accgated_coverage_10/group_std_mean": 0.3455429673194885, "signal/accgated_coverage_10/group_zero_std_frac": 0.003125, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.029561495035886766, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.029561495035886766, "signal/accgated_coverage_15/centered_abs_mean": 0.2956149399280548, "signal/accgated_coverage_15/group_std_mean": 0.3455429673194885, "signal/accgated_coverage_15/group_zero_std_frac": 0.003125, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.029561495035886766, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.029561495035886766, "signal/accgated_coverage_20/centered_abs_mean": 0.2956149399280548, "signal/accgated_coverage_20/group_std_mean": 0.3455429673194885, "signal/accgated_coverage_20/group_zero_std_frac": 0.003125, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.029561495035886766, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.029561495035886766, "signal/accgated_coverage_25/centered_abs_mean": 0.2956149399280548, "signal/accgated_coverage_25/group_std_mean": 0.3455429673194885, "signal/accgated_coverage_25/group_zero_std_frac": 0.003125, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.029561495035886766, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.029561495035886766, "signal/accgated_coverage_5/centered_abs_mean": 0.2956149399280548, "signal/accgated_coverage_5/group_std_mean": 0.3455429673194885, "signal/accgated_coverage_5/group_zero_std_frac": 0.003125, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.029561495035886766, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.029561495035886766, "signal/accuracy_reward/centered_abs_mean": 0.24259033203125, "signal/accuracy_reward/group_std_mean": 0.28361558318138125, "signal/accuracy_reward/group_zero_std_frac": 0.31875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.121295166015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.121295166015625, "signal/advantage_abs_mean": 0.5158906996250152, "signal/advantage_pre_scale_abs_mean": 0.5158906996250152, "signal/advantage_pre_scale_std": 0.6248098611831665, "signal/advantage_std": 0.6248098611831665, "signal/brier_reward/centered_abs_mean": 0.3227140247821808, "signal/brier_reward/group_std_mean": 0.36719409823417665, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.032271404191851615, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.032271404191851615, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.3028075397014618, "signal/confidence_uniqueness_reward/group_std_mean": 0.3515664279460907, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03028075359761715, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03028075359761715, "signal/format_reward/centered_abs_mean": 0.407098388671875, "signal/format_reward/group_std_mean": 0.4557113587856293, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2035491943359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.2035491943359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.2956149399280548, "signal/frontier_aurc_reward/group_std_mean": 0.3455429673194885, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0036951868794858457, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0036951868794858457, "signal/frontier_ece_reward/centered_abs_mean": 0.2956149399280548, "signal/frontier_ece_reward/group_std_mean": 0.3455429673194885, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.029561495035886766, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.029561495035886766, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4288070142269135, "signal/frontier_entropy_batch_reward/group_std_mean": 0.47342650294303895, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04288070127367973, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04288070127367973, "step": 5 }, { "calibration/aurc": 0.6784755921026754, "calibration/batch_distribution_entropy": 0.6632685785940718, "calibration/confidence_entropy": 0.34600416614606927, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5261398694458579, "calibration/mean_confidence": 0.7872892063716558, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03779296875, "completions/max_length": 1490.4, "completions/max_terminated_length": 1490.4, "completions/mean_length": 204.46240234375, "completions/mean_terminated_length": 212.52877502441407, "completions/min_length": 0.0, "completions/min_terminated_length": 1.8, "epoch": 0.032, "grad_norm": 0.053165026009082794, "learning_rate": 6.249999999999999e-07, "loss": 0.0101, "num_tokens": 34234441.0, "reward": 0.7344912886619568, "reward_std": 0.5754643559455872, "rewards/accgated_coverage_0": 0.3019598960876465, "rewards/accgated_coverage_1": 0.3019598960876465, "rewards/accgated_coverage_10": 0.3019598960876465, "rewards/accgated_coverage_15": 0.3019598960876465, "rewards/accgated_coverage_20": 0.3019598960876465, "rewards/accgated_coverage_25": 0.3019598960876465, "rewards/accgated_coverage_5": 0.3019598960876465, "rewards/accuracy_reward": 0.21142578125, "rewards/brier_reward": 0.38320069313049315, "rewards/confidence_uniqueness_reward": 0.5189769625663757, "rewards/format_reward": 0.7234375, "rewards/frontier_aurc_reward": 0.3019598960876465, "rewards/frontier_ece_reward": 0.3019598960876465, "rewards/frontier_entropy_batch_reward": -0.685005521774292, "signal/accgated_coverage_0/centered_abs_mean": 0.2811114966869354, "signal/accgated_coverage_0/group_std_mean": 0.33583817481994627, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.028111150488257408, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.028111150488257408, "signal/accgated_coverage_1/centered_abs_mean": 0.2811114966869354, "signal/accgated_coverage_1/group_std_mean": 0.33583817481994627, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.028111150488257408, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.028111150488257408, "signal/accgated_coverage_10/centered_abs_mean": 0.2811114966869354, "signal/accgated_coverage_10/group_std_mean": 0.33583817481994627, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.028111150488257408, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.028111150488257408, "signal/accgated_coverage_15/centered_abs_mean": 0.2811114966869354, "signal/accgated_coverage_15/group_std_mean": 0.33583817481994627, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.028111150488257408, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.028111150488257408, "signal/accgated_coverage_20/centered_abs_mean": 0.2811114966869354, "signal/accgated_coverage_20/group_std_mean": 0.33583817481994627, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.028111150488257408, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.028111150488257408, "signal/accgated_coverage_25/centered_abs_mean": 0.2811114966869354, "signal/accgated_coverage_25/group_std_mean": 0.33583817481994627, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.028111150488257408, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.028111150488257408, "signal/accgated_coverage_5/centered_abs_mean": 0.2811114966869354, "signal/accgated_coverage_5/group_std_mean": 0.33583817481994627, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.028111150488257408, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.028111150488257408, "signal/accuracy_reward/centered_abs_mean": 0.226324462890625, "signal/accuracy_reward/group_std_mean": 0.27484233379364015, "signal/accuracy_reward/group_zero_std_frac": 0.30625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1131622314453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1131622314453125, "signal/advantage_abs_mean": 0.4752094566822052, "signal/advantage_pre_scale_abs_mean": 0.4752094566822052, "signal/advantage_pre_scale_std": 0.5934478282928467, "signal/advantage_std": 0.5934478282928467, "signal/brier_reward/centered_abs_mean": 0.30669367909431455, "signal/brier_reward/group_std_mean": 0.35490121245384215, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03066936805844307, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03066936805844307, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2765663981437683, "signal/confidence_uniqueness_reward/group_std_mean": 0.33613392114639284, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027656640484929086, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.027656640484929086, "signal/format_reward/centered_abs_mean": 0.3680908203125, "signal/format_reward/group_std_mean": 0.4318098545074463, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.18404541015625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.18404541015625, "signal/frontier_aurc_reward/centered_abs_mean": 0.2811114966869354, "signal/frontier_aurc_reward/group_std_mean": 0.33583817481994627, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003513893811032176, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003513893811032176, "signal/frontier_ece_reward/centered_abs_mean": 0.2811114966869354, "signal/frontier_ece_reward/group_std_mean": 0.33583817481994627, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.028111150488257408, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.028111150488257408, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4002421200275421, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4575047969818115, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04002421200275421, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04002421200275421, "step": 10 }, { "calibration/aurc": 0.5994437029504185, "calibration/batch_distribution_entropy": 0.6537833231091288, "calibration/buffer_distribution_entropy": 0.670862995600238, "calibration/confidence_entropy": 0.35392652781636763, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.45825798508943094, "calibration/mean_confidence": 0.7940574361042888, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0169921875, "completions/max_length": 1496.6, "completions/max_terminated_length": 1496.6, "completions/mean_length": 177.0701171875, "completions/mean_terminated_length": 180.23756408691406, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.048, "grad_norm": 0.00785356666892767, "learning_rate": 9.374999999999999e-07, "loss": 0.0133, "num_tokens": 51096375.0, "reward": 0.8466886758804322, "reward_std": 0.44576832354068757, "rewards/accgated_coverage_0": 0.30328094847500325, "rewards/accgated_coverage_1": 0.30328094847500325, "rewards/accgated_coverage_10": 0.30328094847500325, "rewards/accgated_coverage_15": 0.30328094847500325, "rewards/accgated_coverage_20": 0.30328094847500325, "rewards/accgated_coverage_25": 0.30328094847500325, "rewards/accgated_coverage_5": 0.30328094847500325, "rewards/accuracy_reward": 0.26787109375, "rewards/brier_reward": 0.48309295177459716, "rewards/confidence_uniqueness_reward": 0.6408498525619507, "rewards/format_reward": 0.8763671875, "rewards/frontier_aurc_reward": 0.30000464636832475, "rewards/frontier_ece_reward": 0.2886372864246368, "rewards/frontier_entropy_batch_reward": -0.8273520708084107, "signal/accgated_coverage_0/centered_abs_mean": 0.21752286404371263, "signal/accgated_coverage_0/group_std_mean": 0.2625477723777294, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.021752287307754158, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.021752287307754158, "signal/accgated_coverage_1/centered_abs_mean": 0.21752286404371263, "signal/accgated_coverage_1/group_std_mean": 0.2625477723777294, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.021752287307754158, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.021752287307754158, "signal/accgated_coverage_10/centered_abs_mean": 0.21752286404371263, "signal/accgated_coverage_10/group_std_mean": 0.2625477723777294, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.021752287307754158, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.021752287307754158, "signal/accgated_coverage_15/centered_abs_mean": 0.21752286404371263, "signal/accgated_coverage_15/group_std_mean": 0.2625477723777294, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.021752287307754158, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.021752287307754158, "signal/accgated_coverage_20/centered_abs_mean": 0.21752286404371263, "signal/accgated_coverage_20/group_std_mean": 0.2625477723777294, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.021752287307754158, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.021752287307754158, "signal/accgated_coverage_25/centered_abs_mean": 0.21752286404371263, "signal/accgated_coverage_25/group_std_mean": 0.2625477723777294, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.021752287307754158, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.021752287307754158, "signal/accgated_coverage_5/centered_abs_mean": 0.21752286404371263, "signal/accgated_coverage_5/group_std_mean": 0.2625477723777294, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.021752287307754158, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.021752287307754158, "signal/accuracy_reward/centered_abs_mean": 0.195428466796875, "signal/accuracy_reward/group_std_mean": 0.24072909057140351, "signal/accuracy_reward/group_zero_std_frac": 0.38125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0977142333984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0977142333984375, "signal/advantage_abs_mean": 0.3535905122756958, "signal/advantage_pre_scale_abs_mean": 0.3535905122756958, "signal/advantage_pre_scale_std": 0.466570183634758, "signal/advantage_std": 0.466570183634758, "signal/brier_reward/centered_abs_mean": 0.2721236228942871, "signal/brier_reward/group_std_mean": 0.3262364029884338, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027212361991405486, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.027212361991405486, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.20057708621025086, "signal/confidence_uniqueness_reward/group_std_mean": 0.2654131382703781, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020057709142565727, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020057709142565727, "signal/format_reward/centered_abs_mean": 0.19754638671875, "signal/format_reward/group_std_mean": 0.2995403289794922, "signal/format_reward/group_zero_std_frac": 0.053125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.098773193359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.098773193359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.21606770902872086, "signal/frontier_aurc_reward/group_std_mean": 0.2601821569725871, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0027008464734535665, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0027008464734535665, "signal/frontier_ece_reward/centered_abs_mean": 0.24298666417598724, "signal/frontier_ece_reward/group_std_mean": 0.29195126295089724, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.024298667535185815, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.024298667535185815, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26501129269599916, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37041118144989016, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026501129567623138, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026501129567623138, "step": 15 }, { "calibration/aurc": 0.5322075250234468, "calibration/batch_distribution_entropy": 0.7415808503894759, "calibration/buffer_distribution_entropy": 0.6724491622416343, "calibration/confidence_entropy": 0.3819189591260301, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3454595472695586, "calibration/mean_confidence": 0.7406530881585631, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0072265625, "completions/max_length": 1334.6, "completions/max_terminated_length": 1334.6, "completions/mean_length": 134.69384765625, "completions/mean_terminated_length": 135.69620513916016, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.064, "grad_norm": 0.031798213720321655, "learning_rate": 1e-06, "loss": -0.0064, "num_tokens": 67394040.0, "reward": 0.7022075057029724, "reward_std": 0.20362389087677002, "rewards/accgated_coverage_0": 0.008948421757668256, "rewards/accgated_coverage_1": 0.008948421757668256, "rewards/accgated_coverage_10": 0.008948421757668256, "rewards/accgated_coverage_15": 0.008948421757668256, "rewards/accgated_coverage_20": 0.008948421757668256, "rewards/accgated_coverage_25": 0.008948421757668256, "rewards/accgated_coverage_5": 0.008948421757668256, "rewards/accuracy_reward": 0.340625, "rewards/brier_reward": 0.5920320749282837, "rewards/confidence_uniqueness_reward": 0.7591738820075988, "rewards/format_reward": 0.96904296875, "rewards/frontier_aurc_reward": -0.006775558087974786, "rewards/frontier_ece_reward": -0.043132821563631296, "rewards/frontier_entropy_batch_reward": -0.8961299777030944, "signal/accgated_coverage_0/centered_abs_mean": 0.01699206493794918, "signal/accgated_coverage_0/group_std_mean": 0.02561163380742073, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0016992064891383051, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0016992064891383051, "signal/accgated_coverage_1/centered_abs_mean": 0.01699206493794918, "signal/accgated_coverage_1/group_std_mean": 0.02561163380742073, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0016992064891383051, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0016992064891383051, "signal/accgated_coverage_10/centered_abs_mean": 0.01699206493794918, "signal/accgated_coverage_10/group_std_mean": 0.02561163380742073, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0016992064891383051, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0016992064891383051, "signal/accgated_coverage_15/centered_abs_mean": 0.01699206493794918, "signal/accgated_coverage_15/group_std_mean": 0.02561163380742073, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0016992064891383051, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0016992064891383051, "signal/accgated_coverage_20/centered_abs_mean": 0.01699206493794918, "signal/accgated_coverage_20/group_std_mean": 0.02561163380742073, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0016992064891383051, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0016992064891383051, "signal/accgated_coverage_25/centered_abs_mean": 0.01699206493794918, "signal/accgated_coverage_25/group_std_mean": 0.02561163380742073, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0016992064891383051, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0016992064891383051, "signal/accgated_coverage_5/centered_abs_mean": 0.01699206493794918, "signal/accgated_coverage_5/group_std_mean": 0.02561163380742073, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0016992064891383051, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0016992064891383051, "signal/accuracy_reward/centered_abs_mean": 0.2025634765625, "signal/accuracy_reward/group_std_mean": 0.2514846593141556, "signal/accuracy_reward/group_zero_std_frac": 0.346875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10128173828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10128173828125, "signal/advantage_abs_mean": 0.15547150671482085, "signal/advantage_pre_scale_abs_mean": 0.15547150671482085, "signal/advantage_pre_scale_std": 0.2213201105594635, "signal/advantage_std": 0.2213201105594635, "signal/brier_reward/centered_abs_mean": 0.2514127492904663, "signal/brier_reward/group_std_mean": 0.3071712851524353, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02514127641916275, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02514127641916275, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.12233100682497025, "signal/confidence_uniqueness_reward/group_std_mean": 0.16284309923648835, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012233100831508636, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012233100831508636, "signal/format_reward/centered_abs_mean": 0.054034423828125, "signal/format_reward/group_std_mean": 0.11254389882087708, "signal/format_reward/group_zero_std_frac": 0.5, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0270172119140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0270172119140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.004748838301748037, "signal/frontier_aurc_reward/group_std_mean": 0.006619170308113098, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.9360478917369616e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.9360478917369616e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.1384577602148056, "signal/frontier_ece_reward/group_std_mean": 0.16538217663764954, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013845776021480561, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013845776021480561, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17491987645626067, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2958513736724854, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.065625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017491987720131875, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017491987720131875, "step": 20 }, { "calibration/aurc": 0.6534105005979708, "calibration/batch_distribution_entropy": 0.882906447603083, "calibration/buffer_distribution_entropy": 0.7222234773610909, "calibration/confidence_entropy": 0.4605512177454085, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3406275119390077, "calibration/mean_confidence": 0.629555657034802, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021484375, "completions/max_length": 879.2, "completions/max_terminated_length": 879.2, "completions/mean_length": 110.02529296875, "completions/mean_terminated_length": 110.26083374023438, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.08, "grad_norm": 0.03735971078276634, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 83453851.0, "reward": 0.7374543428421021, "reward_std": 0.17404116094112396, "rewards/accgated_coverage_0": 0.010930617339909077, "rewards/accgated_coverage_1": 0.010930617339909077, "rewards/accgated_coverage_10": 0.010930617339909077, "rewards/accgated_coverage_15": 0.010930617339909077, "rewards/accgated_coverage_20": 0.010930617339909077, "rewards/accgated_coverage_25": 0.010930617339909077, "rewards/accgated_coverage_5": 0.010930617339909077, "rewards/accuracy_reward": 0.34697265625, "rewards/brier_reward": 0.6479137420654297, "rewards/confidence_uniqueness_reward": 0.8465597629547119, "rewards/format_reward": 0.9888671875, "rewards/frontier_aurc_reward": -0.006063922494649887, "rewards/frontier_ece_reward": -0.035230358317494395, "rewards/frontier_entropy_batch_reward": -0.8396552681922913, "signal/accgated_coverage_0/centered_abs_mean": 0.025153553858399392, "signal/accgated_coverage_0/group_std_mean": 0.035369380936026575, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.002515355497598648, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.002515355497598648, "signal/accgated_coverage_1/centered_abs_mean": 0.025153553858399392, "signal/accgated_coverage_1/group_std_mean": 0.035369380936026575, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.002515355497598648, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.002515355497598648, "signal/accgated_coverage_10/centered_abs_mean": 0.025153553858399392, "signal/accgated_coverage_10/group_std_mean": 0.035369380936026575, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.002515355497598648, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.002515355497598648, "signal/accgated_coverage_15/centered_abs_mean": 0.025153553858399392, "signal/accgated_coverage_15/group_std_mean": 0.035369380936026575, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.002515355497598648, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.002515355497598648, "signal/accgated_coverage_20/centered_abs_mean": 0.025153553858399392, "signal/accgated_coverage_20/group_std_mean": 0.035369380936026575, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.002515355497598648, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.002515355497598648, "signal/accgated_coverage_25/centered_abs_mean": 0.025153553858399392, "signal/accgated_coverage_25/group_std_mean": 0.035369380936026575, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.002515355497598648, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.002515355497598648, "signal/accgated_coverage_5/centered_abs_mean": 0.025153553858399392, "signal/accgated_coverage_5/group_std_mean": 0.035369380936026575, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.002515355497598648, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.002515355497598648, "signal/accuracy_reward/centered_abs_mean": 0.193377685546875, "signal/accuracy_reward/group_std_mean": 0.2430298238992691, "signal/accuracy_reward/group_zero_std_frac": 0.35625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0966888427734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0966888427734375, "signal/advantage_abs_mean": 0.13480161428451537, "signal/advantage_pre_scale_abs_mean": 0.13480161428451537, "signal/advantage_pre_scale_std": 0.1890869230031967, "signal/advantage_std": 0.1890869230031967, "signal/brier_reward/centered_abs_mean": 0.24160752594470977, "signal/brier_reward/group_std_mean": 0.29405343532562256, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024160753190517425, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.024160753190517425, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07963932007551193, "signal/confidence_uniqueness_reward/group_std_mean": 0.11142729669809341, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007963932119309902, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007963932119309902, "signal/format_reward/centered_abs_mean": 0.0205322265625, "signal/format_reward/group_std_mean": 0.04784815683960915, "signal/format_reward/group_zero_std_frac": 0.765625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01026611328125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01026611328125, "signal/frontier_aurc_reward/centered_abs_mean": 0.003672349965199828, "signal/frontier_aurc_reward/group_std_mean": 0.005370978266000748, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.590437456499785e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.590437456499785e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.11837852597236634, "signal/frontier_ece_reward/group_std_mean": 0.14812108874320984, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.01183785293251276, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.01183785293251276, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2632958233356476, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4028609037399292, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026329582929611205, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026329582929611205, "step": 25 }, { "calibration/aurc": 0.6480277663972374, "calibration/batch_distribution_entropy": 0.9591338539033302, "calibration/buffer_distribution_entropy": 0.7915711200792763, "calibration/confidence_entropy": 0.5127234109549083, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.2450127953620774, "calibration/mean_confidence": 0.4834098702409223, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021484375, "completions/max_length": 1057.2, "completions/max_terminated_length": 1057.2, "completions/mean_length": 107.9982421875, "completions/mean_terminated_length": 108.23075714111329, "completions/min_length": 0.0, "completions/min_terminated_length": 14.8, "epoch": 0.096, "grad_norm": 0.018712179735302925, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 99604361.0, "reward": 0.7748218059539795, "reward_std": 0.16123634278774263, "rewards/accgated_coverage_0": 0.01546512171626091, "rewards/accgated_coverage_1": 0.01546512171626091, "rewards/accgated_coverage_10": 0.01546512171626091, "rewards/accgated_coverage_15": 0.01546512171626091, "rewards/accgated_coverage_20": 0.01546512171626091, "rewards/accgated_coverage_25": 0.01546512171626091, "rewards/accgated_coverage_5": 0.01546512171626091, "rewards/accuracy_reward": 0.3490234375, "rewards/brier_reward": 0.7032750844955444, "rewards/confidence_uniqueness_reward": 0.9063532948493958, "rewards/format_reward": 0.99130859375, "rewards/frontier_aurc_reward": -0.005372885894030332, "rewards/frontier_ece_reward": -0.020505653135478495, "rewards/frontier_entropy_batch_reward": -0.650149130821228, "signal/accgated_coverage_0/centered_abs_mean": 0.033817265182733536, "signal/accgated_coverage_0/group_std_mean": 0.04317799136042595, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0033817265182733538, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0033817265182733538, "signal/accgated_coverage_1/centered_abs_mean": 0.033817265182733536, "signal/accgated_coverage_1/group_std_mean": 0.04317799136042595, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0033817265182733538, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0033817265182733538, "signal/accgated_coverage_10/centered_abs_mean": 0.033817265182733536, "signal/accgated_coverage_10/group_std_mean": 0.04317799136042595, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0033817265182733538, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0033817265182733538, "signal/accgated_coverage_15/centered_abs_mean": 0.033817265182733536, "signal/accgated_coverage_15/group_std_mean": 0.04317799136042595, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0033817265182733538, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0033817265182733538, "signal/accgated_coverage_20/centered_abs_mean": 0.033817265182733536, "signal/accgated_coverage_20/group_std_mean": 0.04317799136042595, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0033817265182733538, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0033817265182733538, "signal/accgated_coverage_25/centered_abs_mean": 0.033817265182733536, "signal/accgated_coverage_25/group_std_mean": 0.04317799136042595, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0033817265182733538, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0033817265182733538, "signal/accgated_coverage_5/centered_abs_mean": 0.033817265182733536, "signal/accgated_coverage_5/group_std_mean": 0.04317799136042595, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0033817265182733538, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0033817265182733538, "signal/accuracy_reward/centered_abs_mean": 0.18580322265625, "signal/accuracy_reward/group_std_mean": 0.2367357134819031, "signal/accuracy_reward/group_zero_std_frac": 0.359375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.092901611328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.092901611328125, "signal/advantage_abs_mean": 0.12383366525173187, "signal/advantage_pre_scale_abs_mean": 0.12383366525173187, "signal/advantage_pre_scale_std": 0.17370418608188629, "signal/advantage_std": 0.17370418608188629, "signal/brier_reward/centered_abs_mean": 0.2232639193534851, "signal/brier_reward/group_std_mean": 0.27452114820480344, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0223263930529356, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0223263930529356, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05847673192620277, "signal/confidence_uniqueness_reward/group_std_mean": 0.08610113561153412, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005847673676908016, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005847673676908016, "signal/format_reward/centered_abs_mean": 0.015130615234375, "signal/format_reward/group_std_mean": 0.03428548686206341, "signal/format_reward/group_zero_std_frac": 0.834375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0075653076171875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0075653076171875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002610748796723783, "signal/frontier_aurc_reward/group_std_mean": 0.004314846731722355, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.2634363742545246e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.2634363742545246e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.0914211854338646, "signal/frontier_ece_reward/group_std_mean": 0.12666151225566863, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009142118506133556, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009142118506133556, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4424427688121796, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5312122881412507, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04424427673220634, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04424427673220634, "step": 30 }, { "calibration/aurc": 0.5095629672072505, "calibration/batch_distribution_entropy": 0.9386988710165081, "calibration/buffer_distribution_entropy": 0.8634836316046473, "calibration/confidence_entropy": 0.5009593051567744, "calibration/coverage@0%": 0.001968503937007874, "calibration/coverage@1%": 0.001968503937007874, "calibration/coverage@10%": 0.001968503937007874, "calibration/coverage@15%": 0.001968503937007874, "calibration/coverage@20%": 0.001968503937007874, "calibration/coverage@25%": 0.0157728804609483, "calibration/coverage@30%": 0.037065337247045305, "calibration/coverage@5%": 0.001968503937007874, "calibration/ece": 0.18292966624226104, "calibration/mean_confidence": 0.37759446560343035, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0064453125, "completions/max_length": 1335.8, "completions/max_terminated_length": 1335.8, "completions/mean_length": 113.25283203125, "completions/mean_terminated_length": 113.99161376953126, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.112, "grad_norm": 0.05354702100157738, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 115873542.0, "reward": 0.8208463191986084, "reward_std": 0.15327103734016417, "rewards/accgated_coverage_0": 0.015279607055708766, "rewards/accgated_coverage_1": 0.015279607055708766, "rewards/accgated_coverage_10": 0.015279607055708766, "rewards/accgated_coverage_15": 0.015279607055708766, "rewards/accgated_coverage_20": 0.015279607055708766, "rewards/accgated_coverage_25": 0.015279607055708766, "rewards/accgated_coverage_5": 0.015279607055708766, "rewards/accuracy_reward": 0.38798828125, "rewards/brier_reward": 0.7118686318397522, "rewards/confidence_uniqueness_reward": 0.9270346283912658, "rewards/format_reward": 0.985546875, "rewards/frontier_aurc_reward": -0.004612564016133547, "rewards/frontier_ece_reward": -0.0035013286280445753, "rewards/frontier_entropy_batch_reward": -0.40099533796310427, "signal/accgated_coverage_0/centered_abs_mean": 0.05104095339775085, "signal/accgated_coverage_0/group_std_mean": 0.06317490637302399, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005104095302522182, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005104095302522182, "signal/accgated_coverage_1/centered_abs_mean": 0.05104095339775085, "signal/accgated_coverage_1/group_std_mean": 0.06317490637302399, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005104095302522182, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005104095302522182, "signal/accgated_coverage_10/centered_abs_mean": 0.05104095339775085, "signal/accgated_coverage_10/group_std_mean": 0.06317490637302399, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005104095302522182, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005104095302522182, "signal/accgated_coverage_15/centered_abs_mean": 0.05104095339775085, "signal/accgated_coverage_15/group_std_mean": 0.06317490637302399, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005104095302522182, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005104095302522182, "signal/accgated_coverage_20/centered_abs_mean": 0.05104095339775085, "signal/accgated_coverage_20/group_std_mean": 0.06317490637302399, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005104095302522182, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005104095302522182, "signal/accgated_coverage_25/centered_abs_mean": 0.05104095339775085, "signal/accgated_coverage_25/group_std_mean": 0.06317490637302399, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005104095302522182, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005104095302522182, "signal/accgated_coverage_5/centered_abs_mean": 0.05104095339775085, "signal/accgated_coverage_5/group_std_mean": 0.06317490637302399, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005104095302522182, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005104095302522182, "signal/accuracy_reward/centered_abs_mean": 0.186871337890625, "signal/accuracy_reward/group_std_mean": 0.2370339572429657, "signal/accuracy_reward/group_zero_std_frac": 0.3625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0934356689453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0934356689453125, "signal/advantage_abs_mean": 0.11395874172449112, "signal/advantage_pre_scale_abs_mean": 0.11395874172449112, "signal/advantage_pre_scale_std": 0.1690732568502426, "signal/advantage_std": 0.1690732568502426, "signal/brier_reward/centered_abs_mean": 0.21671865582466127, "signal/brier_reward/group_std_mean": 0.26835680603981016, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02167186588048935, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02167186588048935, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04837794005870819, "signal/confidence_uniqueness_reward/group_std_mean": 0.08344578742980957, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004837794043123722, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004837794043123722, "signal/format_reward/centered_abs_mean": 0.026171875, "signal/format_reward/group_std_mean": 0.05847979113459587, "signal/format_reward/group_zero_std_frac": 0.721875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0130859375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0130859375, "signal/frontier_aurc_reward/centered_abs_mean": 0.00165214529260993, "signal/frontier_aurc_reward/group_std_mean": 0.002674648817628622, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0651815793826246e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0651815793826246e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05707942098379135, "signal/frontier_ece_reward/group_std_mean": 0.08767256736755372, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005707942321896553, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005707942321896553, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4562400221824646, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5098948240280151, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04562400206923485, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04562400206923485, "step": 35 }, { "calibration/aurc": 0.5786534736820717, "calibration/batch_distribution_entropy": 0.9021460403906527, "calibration/buffer_distribution_entropy": 0.9127727634448434, "calibration/confidence_entropy": 0.4836999684154605, "calibration/coverage@0%": 0.001987385490975714, "calibration/coverage@1%": 0.001987385490975714, "calibration/coverage@10%": 0.001987385490975714, "calibration/coverage@15%": 0.003975457061552255, "calibration/coverage@20%": 0.003975457061552255, "calibration/coverage@25%": 0.004373071375667563, "calibration/coverage@30%": 0.009471110591353837, "calibration/coverage@5%": 0.001987385490975714, "calibration/ece": 0.1779325393409029, "calibration/mean_confidence": 0.3209674774972401, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1260.2, "completions/max_terminated_length": 1260.2, "completions/mean_length": 120.1251953125, "completions/mean_terminated_length": 121.06528930664062, "completions/min_length": 0.0, "completions/min_terminated_length": 28.6, "epoch": 0.128, "grad_norm": 0.045759644359350204, "learning_rate": 1e-06, "loss": -0.0088, "num_tokens": 132020296.0, "reward": 0.8203153371810913, "reward_std": 0.13453607708215715, "rewards/accgated_coverage_0": 0.017799985222518445, "rewards/accgated_coverage_1": 0.017799985222518445, "rewards/accgated_coverage_10": 0.017799985222518445, "rewards/accgated_coverage_15": 0.017799985222518445, "rewards/accgated_coverage_20": 0.017799985222518445, "rewards/accgated_coverage_25": 0.017799985222518445, "rewards/accgated_coverage_5": 0.017799985222518445, "rewards/accuracy_reward": 0.37900390625, "rewards/brier_reward": 0.7211938977241517, "rewards/confidence_uniqueness_reward": 0.9303524374961853, "rewards/format_reward": 0.9890625, "rewards/frontier_aurc_reward": -0.004417700413614512, "rewards/frontier_ece_reward": 0.00374011246021837, "rewards/frontier_entropy_batch_reward": -0.4165126860141754, "signal/accgated_coverage_0/centered_abs_mean": 0.04929931238293648, "signal/accgated_coverage_0/group_std_mean": 0.060933683067560196, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004929931275546551, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004929931275546551, "signal/accgated_coverage_1/centered_abs_mean": 0.04929931238293648, "signal/accgated_coverage_1/group_std_mean": 0.060933683067560196, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004929931275546551, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004929931275546551, "signal/accgated_coverage_10/centered_abs_mean": 0.04929931238293648, "signal/accgated_coverage_10/group_std_mean": 0.060933683067560196, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004929931275546551, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004929931275546551, "signal/accgated_coverage_15/centered_abs_mean": 0.04929931238293648, "signal/accgated_coverage_15/group_std_mean": 0.060933683067560196, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004929931275546551, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004929931275546551, "signal/accgated_coverage_20/centered_abs_mean": 0.04929931238293648, "signal/accgated_coverage_20/group_std_mean": 0.060933683067560196, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004929931275546551, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004929931275546551, "signal/accgated_coverage_25/centered_abs_mean": 0.04929931238293648, "signal/accgated_coverage_25/group_std_mean": 0.060933683067560196, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004929931275546551, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004929931275546551, "signal/accgated_coverage_5/centered_abs_mean": 0.04929931238293648, "signal/accgated_coverage_5/group_std_mean": 0.060933683067560196, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004929931275546551, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004929931275546551, "signal/accuracy_reward/centered_abs_mean": 0.173748779296875, "signal/accuracy_reward/group_std_mean": 0.22208267450332642, "signal/accuracy_reward/group_zero_std_frac": 0.390625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0868743896484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0868743896484375, "signal/advantage_abs_mean": 0.101476289331913, "signal/advantage_pre_scale_abs_mean": 0.101476289331913, "signal/advantage_pre_scale_std": 0.15157161951065062, "signal/advantage_std": 0.15157161951065062, "signal/brier_reward/centered_abs_mean": 0.20460852086544037, "signal/brier_reward/group_std_mean": 0.25686987340450285, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02046085223555565, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02046085223555565, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0362715695053339, "signal/confidence_uniqueness_reward/group_std_mean": 0.06078647375106812, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003627156838774681, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003627156838774681, "signal/format_reward/centered_abs_mean": 0.01942138671875, "signal/format_reward/group_std_mean": 0.041324655339121816, "signal/format_reward/group_zero_std_frac": 0.809375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009710693359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009710693359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014763909159228206, "signal/frontier_aurc_reward/group_std_mean": 0.002241973439231515, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.845488623075653e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.845488623075653e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.046713653951883316, "signal/frontier_ece_reward/group_std_mean": 0.07055558562278748, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004671365395188332, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004671365395188332, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42762730121612547, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4882843255996704, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04276273101568222, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04276273101568222, "step": 40 }, { "calibration/aurc": 0.421006288092185, "calibration/batch_distribution_entropy": 0.9715952995597832, "calibration/buffer_distribution_entropy": 0.9399097549752383, "calibration/confidence_entropy": 0.5176721474972316, "calibration/coverage@0%": 0.0011928429423459243, "calibration/coverage@1%": 0.0011928429423459243, "calibration/coverage@10%": 0.0011928429423459243, "calibration/coverage@15%": 0.0011928429423459243, "calibration/coverage@20%": 0.0031888509263778607, "calibration/coverage@25%": 0.11491847319277945, "calibration/coverage@30%": 0.18768189267588084, "calibration/coverage@5%": 0.0011928429423459243, "calibration/ece": 0.22219560625757112, "calibration/mean_confidence": 0.42302083297278054, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0109375, "completions/max_length": 1318.2, "completions/max_terminated_length": 1318.2, "completions/mean_length": 122.58076171875, "completions/mean_terminated_length": 123.93398590087891, "completions/min_length": 0.0, "completions/min_terminated_length": 5.2, "epoch": 0.144, "grad_norm": 0.03618094325065613, "learning_rate": 1e-06, "loss": -0.0104, "num_tokens": 148225955.0, "reward": 0.8666697382926941, "reward_std": 0.15474329888820648, "rewards/accgated_coverage_0": 0.004803288914263249, "rewards/accgated_coverage_1": 0.004803288914263249, "rewards/accgated_coverage_10": 0.004803288914263249, "rewards/accgated_coverage_15": 0.004803288914263249, "rewards/accgated_coverage_20": 0.004803288914263249, "rewards/accgated_coverage_25": 0.004803288914263249, "rewards/accgated_coverage_5": 0.004803288914263249, "rewards/accuracy_reward": 0.46796875, "rewards/brier_reward": 0.6943991541862488, "rewards/confidence_uniqueness_reward": 0.9354530334472656, "rewards/format_reward": 0.98427734375, "rewards/frontier_aurc_reward": -0.004057955369353295, "rewards/frontier_ece_reward": 0.0027345028007403015, "rewards/frontier_entropy_batch_reward": -0.2602355360984802, "signal/accgated_coverage_0/centered_abs_mean": 0.0669349491596222, "signal/accgated_coverage_0/group_std_mean": 0.08391269594430924, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.006693494878709317, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.006693494878709317, "signal/accgated_coverage_1/centered_abs_mean": 0.0669349491596222, "signal/accgated_coverage_1/group_std_mean": 0.08391269594430924, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.006693494878709317, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.006693494878709317, "signal/accgated_coverage_10/centered_abs_mean": 0.0669349491596222, "signal/accgated_coverage_10/group_std_mean": 0.08391269594430924, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.006693494878709317, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.006693494878709317, "signal/accgated_coverage_15/centered_abs_mean": 0.0669349491596222, "signal/accgated_coverage_15/group_std_mean": 0.08391269594430924, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006693494878709317, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006693494878709317, "signal/accgated_coverage_20/centered_abs_mean": 0.0669349491596222, "signal/accgated_coverage_20/group_std_mean": 0.08391269594430924, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.006693494878709317, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.006693494878709317, "signal/accgated_coverage_25/centered_abs_mean": 0.0669349491596222, "signal/accgated_coverage_25/group_std_mean": 0.08391269594430924, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.006693494878709317, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.006693494878709317, "signal/accgated_coverage_5/centered_abs_mean": 0.0669349491596222, "signal/accgated_coverage_5/group_std_mean": 0.08391269594430924, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.006693494878709317, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.006693494878709317, "signal/accuracy_reward/centered_abs_mean": 0.17861328125, "signal/accuracy_reward/group_std_mean": 0.2309749722480774, "signal/accuracy_reward/group_zero_std_frac": 0.359375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.089306640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.089306640625, "signal/advantage_abs_mean": 0.11762301176786423, "signal/advantage_pre_scale_abs_mean": 0.11762301176786423, "signal/advantage_pre_scale_std": 0.16891648769378662, "signal/advantage_std": 0.16891648769378662, "signal/brier_reward/centered_abs_mean": 0.21884905993938447, "signal/brier_reward/group_std_mean": 0.2688636898994446, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02188490703701973, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02188490703701973, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.038263066112995146, "signal/confidence_uniqueness_reward/group_std_mean": 0.06704763397574424, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038263065740466117, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038263065740466117, "signal/format_reward/centered_abs_mean": 0.027459716796875, "signal/format_reward/group_std_mean": 0.05488141924142838, "signal/format_reward/group_zero_std_frac": 0.759375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0137298583984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0137298583984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020087390905246137, "signal/frontier_aurc_reward/group_std_mean": 0.0029508148785680533, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5109239504672588e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5109239504672588e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.052871844917535785, "signal/frontier_ece_reward/group_std_mean": 0.07742156088352203, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005287184566259384, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005287184566259384, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3467997610569, "signal/frontier_entropy_batch_reward/group_std_mean": 0.420889800786972, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034679976850748064, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034679976850748064, "step": 45 }, { "calibration/aurc": 0.49606457835475953, "calibration/batch_distribution_entropy": 0.9855216279436447, "calibration/buffer_distribution_entropy": 0.9546393763284033, "calibration/confidence_entropy": 0.5309623955093902, "calibration/coverage@0%": 0.002379660992144125, "calibration/coverage@1%": 0.002379660992144125, "calibration/coverage@10%": 0.002379660992144125, "calibration/coverage@15%": 0.00553547953257805, "calibration/coverage@20%": 0.00553547953257805, "calibration/coverage@25%": 0.018316860835618055, "calibration/coverage@30%": 0.01950661707393295, "calibration/coverage@5%": 0.002379660992144125, "calibration/ece": 0.17130343214695962, "calibration/mean_confidence": 0.5091154509440758, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00654296875, "completions/max_length": 1147.4, "completions/max_terminated_length": 1147.4, "completions/mean_length": 129.88291015625, "completions/mean_terminated_length": 130.73819885253906, "completions/min_length": 0.0, "completions/min_terminated_length": 11.4, "epoch": 0.16, "grad_norm": 0.01775994896888733, "learning_rate": 1e-06, "loss": -0.0066, "num_tokens": 164576884.0, "reward": 0.8649451255798339, "reward_std": 0.15237079560756683, "rewards/accgated_coverage_0": 0.013734531402587891, "rewards/accgated_coverage_1": 0.013734531402587891, "rewards/accgated_coverage_10": 0.013734531402587891, "rewards/accgated_coverage_15": 0.013734531402587891, "rewards/accgated_coverage_20": 0.013734531402587891, "rewards/accgated_coverage_25": 0.013734531402587891, "rewards/accgated_coverage_5": 0.013734531402587891, "rewards/accuracy_reward": 0.42509765625, "rewards/brier_reward": 0.7105550646781922, "rewards/confidence_uniqueness_reward": 0.9447488427162171, "rewards/format_reward": 0.98974609375, "rewards/frontier_aurc_reward": -0.004470669943839311, "rewards/frontier_ece_reward": 0.0029585707816295326, "rewards/frontier_entropy_batch_reward": -0.17861297130584716, "signal/accgated_coverage_0/centered_abs_mean": 0.04411946162581444, "signal/accgated_coverage_0/group_std_mean": 0.056043070554733274, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004411946143954992, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004411946143954992, "signal/accgated_coverage_1/centered_abs_mean": 0.04411946162581444, "signal/accgated_coverage_1/group_std_mean": 0.056043070554733274, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004411946143954992, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004411946143954992, "signal/accgated_coverage_10/centered_abs_mean": 0.04411946162581444, "signal/accgated_coverage_10/group_std_mean": 0.056043070554733274, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004411946143954992, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004411946143954992, "signal/accgated_coverage_15/centered_abs_mean": 0.04411946162581444, "signal/accgated_coverage_15/group_std_mean": 0.056043070554733274, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004411946143954992, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004411946143954992, "signal/accgated_coverage_20/centered_abs_mean": 0.04411946162581444, "signal/accgated_coverage_20/group_std_mean": 0.056043070554733274, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004411946143954992, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004411946143954992, "signal/accgated_coverage_25/centered_abs_mean": 0.04411946162581444, "signal/accgated_coverage_25/group_std_mean": 0.056043070554733274, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004411946143954992, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004411946143954992, "signal/accgated_coverage_5/centered_abs_mean": 0.04411946162581444, "signal/accgated_coverage_5/group_std_mean": 0.056043070554733274, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004411946143954992, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004411946143954992, "signal/accuracy_reward/centered_abs_mean": 0.170623779296875, "signal/accuracy_reward/group_std_mean": 0.21849047839641572, "signal/accuracy_reward/group_zero_std_frac": 0.4, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0853118896484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0853118896484375, "signal/advantage_abs_mean": 0.1180063620209694, "signal/advantage_pre_scale_abs_mean": 0.1180063620209694, "signal/advantage_pre_scale_std": 0.1680304616689682, "signal/advantage_std": 0.1680304616689682, "signal/brier_reward/centered_abs_mean": 0.2101664960384369, "signal/brier_reward/group_std_mean": 0.2581899106502533, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02101665027439594, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02101665027439594, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027367017790675165, "signal/confidence_uniqueness_reward/group_std_mean": 0.04632028862833977, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027367019560188056, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027367019560188056, "signal/format_reward/centered_abs_mean": 0.017938232421875, "signal/format_reward/group_std_mean": 0.035063137859106065, "signal/format_reward/group_zero_std_frac": 0.85, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0089691162109375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0089691162109375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027033270802348853, "signal/frontier_aurc_reward/group_std_mean": 0.003944651270285249, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.379158952157013e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.379158952157013e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.06154962629079819, "signal/frontier_ece_reward/group_std_mean": 0.08616945594549179, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0061549627222120765, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0061549627222120765, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2729690343141556, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35792847275733947, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02729690447449684, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02729690447449684, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.6405628666985336, "eval_calibration/batch_distribution_entropy": 0.9033017844710131, "eval_calibration/buffer_distribution_entropy": 0.9602131464411764, "eval_calibration/confidence_entropy": 0.5389164854891668, "eval_calibration/coverage@0%": 0.03125, "eval_calibration/coverage@1%": 0.03125, "eval_calibration/coverage@10%": 0.03125, "eval_calibration/coverage@15%": 0.03125, "eval_calibration/coverage@20%": 0.0390625, "eval_calibration/coverage@25%": 0.0703125, "eval_calibration/coverage@30%": 0.0859375, "eval_calibration/coverage@5%": 0.03125, "eval_calibration/ece": 0.3262753593432853, "eval_calibration/mean_confidence": 0.5574982158874487, "eval_completions/clipped_ratio": 0.001953125, "eval_completions/max_length": 405.25, "eval_completions/max_terminated_length": 405.25, "eval_completions/mean_length": 138.56182479858398, "eval_completions/mean_terminated_length": 138.84318161010742, "eval_completions/min_length": 51.0, "eval_completions/min_terminated_length": 66.75, "eval_loss": 0.0, "eval_num_tokens": 164576884.0, "eval_reward": 0.7559798359870911, "eval_reward_std": 0.23514112457633018, "eval_rewards/accgated_coverage_0": 0.013314789393916726, "eval_rewards/accgated_coverage_1": 0.013314789393916726, "eval_rewards/accgated_coverage_10": 0.013314789393916726, "eval_rewards/accgated_coverage_15": 0.013314789393916726, "eval_rewards/accgated_coverage_20": 0.013314789393916726, "eval_rewards/accgated_coverage_25": 0.013314789393916726, "eval_rewards/accgated_coverage_5": 0.013314789393916726, "eval_rewards/accuracy_reward": 0.376953125, "eval_rewards/brier_reward": 0.6998499184846878, "eval_rewards/confidence_uniqueness_reward": 0.8947123885154724, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_aurc_reward": -0.0051660287426784635, "eval_rewards/frontier_ece_reward": -0.004275021725334227, "eval_rewards/frontier_entropy_batch_reward": -0.998046875, "eval_runtime": 28.9215, "eval_samples_per_second": 17.288, "eval_signal/accgated_coverage_0/centered_abs_mean": 0.03896623570472002, "eval_signal/accgated_coverage_0/group_std_mean": 0.052276285365223885, "eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00389662355883047, "eval_signal/accgated_coverage_0/weight": 0.10000000149011612, "eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00389662355883047, "eval_signal/accgated_coverage_1/centered_abs_mean": 0.03896623570472002, "eval_signal/accgated_coverage_1/group_std_mean": 0.052276285365223885, "eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00389662355883047, "eval_signal/accgated_coverage_1/weight": 0.10000000149011612, "eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00389662355883047, "eval_signal/accgated_coverage_10/centered_abs_mean": 0.03896623570472002, "eval_signal/accgated_coverage_10/group_std_mean": 0.052276285365223885, "eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00389662355883047, "eval_signal/accgated_coverage_10/weight": 0.10000000149011612, "eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00389662355883047, "eval_signal/accgated_coverage_15/centered_abs_mean": 0.03896623570472002, "eval_signal/accgated_coverage_15/group_std_mean": 0.052276285365223885, "eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00389662355883047, "eval_signal/accgated_coverage_15/weight": 0.10000000149011612, "eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00389662355883047, "eval_signal/accgated_coverage_20/centered_abs_mean": 0.03896623570472002, "eval_signal/accgated_coverage_20/group_std_mean": 0.052276285365223885, "eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.00389662355883047, "eval_signal/accgated_coverage_20/weight": 0.10000000149011612, "eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.00389662355883047, "eval_signal/accgated_coverage_25/centered_abs_mean": 0.03896623570472002, "eval_signal/accgated_coverage_25/group_std_mean": 0.052276285365223885, "eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.00389662355883047, "eval_signal/accgated_coverage_25/weight": 0.10000000149011612, "eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.00389662355883047, "eval_signal/accgated_coverage_5/centered_abs_mean": 0.03896623570472002, "eval_signal/accgated_coverage_5/group_std_mean": 0.052276285365223885, "eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00389662355883047, "eval_signal/accgated_coverage_5/weight": 0.10000000149011612, "eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00389662355883047, "eval_signal/accuracy_reward/centered_abs_mean": 0.4581298828125, "eval_signal/accuracy_reward/group_std_mean": 0.48544733971357346, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22906494140625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22906494140625, "eval_signal/advantage_abs_mean": 0.21391661465168, "eval_signal/advantage_pre_scale_abs_mean": 0.21391661465168, "eval_signal/advantage_pre_scale_std": 0.23284973949193954, "eval_signal/advantage_std": 0.23284973949193954, "eval_signal/brier_reward/centered_abs_mean": 0.2159881703555584, "eval_signal/brier_reward/group_std_mean": 0.2689853310585022, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0215988177806139, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.0215988177806139, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04416041262447834, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05725146550685167, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004416041250806302, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004416041250806302, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0039943401352502406, "eval_signal/frontier_aurc_reward/group_std_mean": 0.005418717511929572, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.992925369151635e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.992925369151635e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.06499312072992325, "eval_signal/frontier_ece_reward/group_std_mean": 0.0963602364063263, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006499312003143132, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006499312003143132, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0037841796875, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.011048543266952038, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9375, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0003784179862122983, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0003784179862122983, "eval_steps_per_second": 0.138, "step": 50 }, { "calibration/aurc": 0.4754503249339259, "calibration/batch_distribution_entropy": 0.9860094280106372, "calibration/buffer_distribution_entropy": 0.9636066838694577, "calibration/confidence_entropy": 0.5166222641614133, "calibration/coverage@0%": 0.003930101199956869, "calibration/coverage@1%": 0.003930101199956869, "calibration/coverage@10%": 0.003930101199956869, "calibration/coverage@15%": 0.003930101199956869, "calibration/coverage@20%": 0.004715955816852743, "calibration/coverage@25%": 0.01453847381957539, "calibration/coverage@30%": 0.015719576181780114, "calibration/coverage@5%": 0.003930101199956869, "calibration/ece": 0.19332276668528464, "calibration/mean_confidence": 0.5447290779242254, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00283203125, "completions/max_length": 956.6, "completions/max_terminated_length": 956.6, "completions/mean_length": 142.855859375, "completions/mean_terminated_length": 143.2583770751953, "completions/min_length": 0.0, "completions/min_terminated_length": 51.2, "epoch": 0.176, "grad_norm": 0.026265909895300865, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 181276848.0, "reward": 0.8707459092140197, "reward_std": 0.1425304591655731, "rewards/accgated_coverage_0": 0.013036295026540756, "rewards/accgated_coverage_1": 0.013036295026540756, "rewards/accgated_coverage_10": 0.013036295026540756, "rewards/accgated_coverage_15": 0.013036295026540756, "rewards/accgated_coverage_20": 0.013036295026540756, "rewards/accgated_coverage_25": 0.013036295026540756, "rewards/accgated_coverage_5": 0.013036295026540756, "rewards/accuracy_reward": 0.4294921875, "rewards/brier_reward": 0.7150080680847168, "rewards/confidence_uniqueness_reward": 0.9512990832328796, "rewards/format_reward": 0.9962890625, "rewards/frontier_aurc_reward": -0.004525785241276026, "rewards/frontier_ece_reward": 0.0045379682444036005, "rewards/frontier_entropy_batch_reward": -0.18298066556453704, "signal/accgated_coverage_0/centered_abs_mean": 0.04418762475252151, "signal/accgated_coverage_0/group_std_mean": 0.05718696340918541, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004418762493878603, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004418762493878603, "signal/accgated_coverage_1/centered_abs_mean": 0.04418762475252151, "signal/accgated_coverage_1/group_std_mean": 0.05718696340918541, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004418762493878603, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004418762493878603, "signal/accgated_coverage_10/centered_abs_mean": 0.04418762475252151, "signal/accgated_coverage_10/group_std_mean": 0.05718696340918541, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004418762493878603, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004418762493878603, "signal/accgated_coverage_15/centered_abs_mean": 0.04418762475252151, "signal/accgated_coverage_15/group_std_mean": 0.05718696340918541, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004418762493878603, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004418762493878603, "signal/accgated_coverage_20/centered_abs_mean": 0.04418762475252151, "signal/accgated_coverage_20/group_std_mean": 0.05718696340918541, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004418762493878603, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004418762493878603, "signal/accgated_coverage_25/centered_abs_mean": 0.04418762475252151, "signal/accgated_coverage_25/group_std_mean": 0.05718696340918541, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004418762493878603, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004418762493878603, "signal/accgated_coverage_5/centered_abs_mean": 0.04418762475252151, "signal/accgated_coverage_5/group_std_mean": 0.05718696340918541, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004418762493878603, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004418762493878603, "signal/accuracy_reward/centered_abs_mean": 0.161376953125, "signal/accuracy_reward/group_std_mean": 0.20732997953891755, "signal/accuracy_reward/group_zero_std_frac": 0.43125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0806884765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0806884765625, "signal/advantage_abs_mean": 0.11180974841117859, "signal/advantage_pre_scale_abs_mean": 0.11180974841117859, "signal/advantage_pre_scale_std": 0.15546224117279053, "signal/advantage_std": 0.15546224117279053, "signal/brier_reward/centered_abs_mean": 0.20797090530395507, "signal/brier_reward/group_std_mean": 0.25753060579299925, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020797090604901314, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020797090604901314, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.017350507155060767, "signal/confidence_uniqueness_reward/group_std_mean": 0.030293600633740425, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0017350507900118829, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017350507900118829, "signal/format_reward/centered_abs_mean": 0.00703125, "signal/format_reward/group_std_mean": 0.017755493894219397, "signal/format_reward/group_zero_std_frac": 0.909375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003515625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003515625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029423195403069256, "signal/frontier_aurc_reward/group_std_mean": 0.004191439598798752, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6778994399355724e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6778994399355724e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.062749931961298, "signal/frontier_ece_reward/group_std_mean": 0.08544526249170303, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006274993345141411, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006274993345141411, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2763451874256134, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35692101120948794, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02763451896607876, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02763451896607876, "step": 55 }, { "calibration/aurc": 0.40837566973095046, "calibration/batch_distribution_entropy": 0.9885787564340249, "calibration/buffer_distribution_entropy": 0.9697773388363558, "calibration/confidence_entropy": 0.48658811017166015, "calibration/coverage@0%": 0.002740502450980392, "calibration/coverage@1%": 0.002740502450980392, "calibration/coverage@10%": 0.006669775535459763, "calibration/coverage@15%": 0.008622900535459763, "calibration/coverage@20%": 0.025432818940444545, "calibration/coverage@25%": 0.047359947417080786, "calibration/coverage@30%": 0.17184148862148002, "calibration/coverage@5%": 0.002740502450980392, "calibration/ece": 0.15979824998045, "calibration/mean_confidence": 0.4947491103908284, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021484375, "completions/max_length": 1097.0, "completions/max_terminated_length": 1097.0, "completions/mean_length": 153.2072265625, "completions/mean_terminated_length": 153.53602600097656, "completions/min_length": 0.0, "completions/min_terminated_length": 38.6, "epoch": 0.192, "grad_norm": 0.06689594686031342, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 197660506.0, "reward": 0.8970973372459412, "reward_std": 0.13292600810527802, "rewards/accgated_coverage_0": 0.016102191619575025, "rewards/accgated_coverage_1": 0.016102191619575025, "rewards/accgated_coverage_10": 0.016102191619575025, "rewards/accgated_coverage_15": 0.016102191619575025, "rewards/accgated_coverage_20": 0.016102191619575025, "rewards/accgated_coverage_25": 0.016102191619575025, "rewards/accgated_coverage_5": 0.016102191619575025, "rewards/accuracy_reward": 0.46796875, "rewards/brier_reward": 0.726796281337738, "rewards/confidence_uniqueness_reward": 0.9531630277633667, "rewards/format_reward": 0.99716796875, "rewards/frontier_aurc_reward": -0.00395333026535809, "rewards/frontier_ece_reward": 0.012035092897713184, "rewards/frontier_entropy_batch_reward": -0.15892549753189086, "signal/accgated_coverage_0/centered_abs_mean": 0.05685779377818108, "signal/accgated_coverage_0/group_std_mean": 0.07282028794288635, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005685779452323914, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005685779452323914, "signal/accgated_coverage_1/centered_abs_mean": 0.05685779377818108, "signal/accgated_coverage_1/group_std_mean": 0.07282028794288635, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005685779452323914, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005685779452323914, "signal/accgated_coverage_10/centered_abs_mean": 0.05685779377818108, "signal/accgated_coverage_10/group_std_mean": 0.07282028794288635, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005685779452323914, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005685779452323914, "signal/accgated_coverage_15/centered_abs_mean": 0.05685779377818108, "signal/accgated_coverage_15/group_std_mean": 0.07282028794288635, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005685779452323914, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005685779452323914, "signal/accgated_coverage_20/centered_abs_mean": 0.05685779377818108, "signal/accgated_coverage_20/group_std_mean": 0.07282028794288635, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005685779452323914, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005685779452323914, "signal/accgated_coverage_25/centered_abs_mean": 0.05685779377818108, "signal/accgated_coverage_25/group_std_mean": 0.07282028794288635, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005685779452323914, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005685779452323914, "signal/accgated_coverage_5/centered_abs_mean": 0.05685779377818108, "signal/accgated_coverage_5/group_std_mean": 0.07282028794288635, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005685779452323914, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005685779452323914, "signal/accuracy_reward/centered_abs_mean": 0.1550048828125, "signal/accuracy_reward/group_std_mean": 0.20295966863632203, "signal/accuracy_reward/group_zero_std_frac": 0.428125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07750244140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07750244140625, "signal/advantage_abs_mean": 0.10285976082086563, "signal/advantage_pre_scale_abs_mean": 0.10285976082086563, "signal/advantage_pre_scale_std": 0.1445027083158493, "signal/advantage_std": 0.1445027083158493, "signal/brier_reward/centered_abs_mean": 0.21229986250400543, "signal/brier_reward/group_std_mean": 0.2616199791431427, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02122998610138893, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02122998610138893, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01506846398115158, "signal/confidence_uniqueness_reward/group_std_mean": 0.025943630561232566, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0015068464446812869, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015068464446812869, "signal/format_reward/centered_abs_mean": 0.005401611328125, "signal/format_reward/group_std_mean": 0.01394332442432642, "signal/format_reward/group_zero_std_frac": 0.928125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0027008056640625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0027008056640625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002814545203000307, "signal/frontier_aurc_reward/group_std_mean": 0.004039418138563633, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.51818154740613e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.51818154740613e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05990893542766571, "signal/frontier_ece_reward/group_std_mean": 0.08055989742279053, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005990893673151731, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005990893673151731, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24653230607509613, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3289069652557373, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024653231725096703, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024653231725096703, "step": 60 }, { "calibration/aurc": 0.34297082237054877, "calibration/batch_distribution_entropy": 0.98456109382964, "calibration/buffer_distribution_entropy": 0.9742222514195511, "calibration/confidence_entropy": 0.4718204105868005, "calibration/coverage@0%": 0.00703125, "calibration/coverage@1%": 0.00703125, "calibration/coverage@10%": 0.07265625, "calibration/coverage@15%": 0.201953125, "calibration/coverage@20%": 0.29921875, "calibration/coverage@25%": 0.384375, "calibration/coverage@30%": 0.484765625, "calibration/coverage@5%": 0.00703125, "calibration/ece": 0.20060203447078181, "calibration/mean_confidence": 0.49395130888201433, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013671875, "completions/max_length": 634.4, "completions/max_terminated_length": 634.4, "completions/mean_length": 161.644921875, "completions/mean_terminated_length": 161.86683044433593, "completions/min_length": 0.0, "completions/min_terminated_length": 52.6, "epoch": 0.208, "grad_norm": 0.03316055238246918, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 214347974.0, "reward": 0.9171293497085571, "reward_std": 0.1312678873538971, "rewards/accgated_coverage_0": 0.01277830610051751, "rewards/accgated_coverage_1": 0.01277830610051751, "rewards/accgated_coverage_10": 0.01277830610051751, "rewards/accgated_coverage_15": 0.01277830610051751, "rewards/accgated_coverage_20": 0.01277830610051751, "rewards/accgated_coverage_25": 0.01277830610051751, "rewards/accgated_coverage_5": 0.01277830610051751, "rewards/accuracy_reward": 0.51337890625, "rewards/brier_reward": 0.7331403970718384, "rewards/confidence_uniqueness_reward": 0.953273355960846, "rewards/format_reward": 0.9982421875, "rewards/frontier_aurc_reward": -0.0034087498672306536, "rewards/frontier_ece_reward": 0.016286897659301757, "rewards/frontier_entropy_batch_reward": -0.17853465378284455, "signal/accgated_coverage_0/centered_abs_mean": 0.07022299095988274, "signal/accgated_coverage_0/group_std_mean": 0.08959027081727981, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007022299244999886, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007022299244999886, "signal/accgated_coverage_1/centered_abs_mean": 0.07022299095988274, "signal/accgated_coverage_1/group_std_mean": 0.08959027081727981, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007022299244999886, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007022299244999886, "signal/accgated_coverage_10/centered_abs_mean": 0.07022299095988274, "signal/accgated_coverage_10/group_std_mean": 0.08959027081727981, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007022299244999886, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007022299244999886, "signal/accgated_coverage_15/centered_abs_mean": 0.07022299095988274, "signal/accgated_coverage_15/group_std_mean": 0.08959027081727981, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.007022299244999886, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.007022299244999886, "signal/accgated_coverage_20/centered_abs_mean": 0.07022299095988274, "signal/accgated_coverage_20/group_std_mean": 0.08959027081727981, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.007022299244999886, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.007022299244999886, "signal/accgated_coverage_25/centered_abs_mean": 0.07022299095988274, "signal/accgated_coverage_25/group_std_mean": 0.08959027081727981, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.007022299244999886, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.007022299244999886, "signal/accgated_coverage_5/centered_abs_mean": 0.07022299095988274, "signal/accgated_coverage_5/group_std_mean": 0.08959027081727981, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007022299244999886, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007022299244999886, "signal/accuracy_reward/centered_abs_mean": 0.148431396484375, "signal/accuracy_reward/group_std_mean": 0.1960848778486252, "signal/accuracy_reward/group_zero_std_frac": 0.446875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0742156982421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0742156982421875, "signal/advantage_abs_mean": 0.10217523276805877, "signal/advantage_pre_scale_abs_mean": 0.10217523276805877, "signal/advantage_pre_scale_std": 0.14075265526771547, "signal/advantage_std": 0.14075265526771547, "signal/brier_reward/centered_abs_mean": 0.21132160127162933, "signal/brier_reward/group_std_mean": 0.26202887296676636, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021132160723209382, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.021132160723209382, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014979423955082893, "signal/confidence_uniqueness_reward/group_std_mean": 0.023213838413357734, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014979424653574825, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014979424653574825, "signal/format_reward/centered_abs_mean": 0.0033447265625, "signal/format_reward/group_std_mean": 0.008539242530241608, "signal/format_reward/group_zero_std_frac": 0.95625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00167236328125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00167236328125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002694142144173384, "signal/frontier_aurc_reward/group_std_mean": 0.0039808189030736685, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.36767770932056e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.36767770932056e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.055422401428222655, "signal/frontier_ece_reward/group_std_mean": 0.0753881111741066, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005542240105569363, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005542240105569363, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26786054074764254, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34874598383903505, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02678605616092682, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02678605616092682, "step": 65 }, { "calibration/aurc": 0.35186780951983854, "calibration/batch_distribution_entropy": 0.9845853007050364, "calibration/buffer_distribution_entropy": 0.9778449012308194, "calibration/confidence_entropy": 0.464560058342187, "calibration/coverage@0%": 0.003125764432485323, "calibration/coverage@1%": 0.003125764432485323, "calibration/coverage@10%": 0.003125764432485323, "calibration/coverage@15%": 0.02970661081213307, "calibration/coverage@20%": 0.18998593444227005, "calibration/coverage@25%": 0.3162434258806262, "calibration/coverage@30%": 0.4678510273972603, "calibration/coverage@5%": 0.003125764432485323, "calibration/ece": 0.16684519307308002, "calibration/mean_confidence": 0.46676442179175454, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 1037.8, "completions/max_terminated_length": 1037.8, "completions/mean_length": 166.5966796875, "completions/mean_terminated_length": 166.72836303710938, "completions/min_length": 11.4, "completions/min_terminated_length": 61.4, "epoch": 0.224, "grad_norm": 0.019673509523272514, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 231207108.0, "reward": 0.8996399760246276, "reward_std": 0.12111974656581878, "rewards/accgated_coverage_0": 0.02115403041243553, "rewards/accgated_coverage_1": 0.02115403041243553, "rewards/accgated_coverage_10": 0.02115403041243553, "rewards/accgated_coverage_15": 0.02115403041243553, "rewards/accgated_coverage_20": 0.02115403041243553, "rewards/accgated_coverage_25": 0.02115403041243553, "rewards/accgated_coverage_5": 0.02115403041243553, "rewards/accuracy_reward": 0.46669921875, "rewards/brier_reward": 0.7538125157356262, "rewards/confidence_uniqueness_reward": 0.9529985308647155, "rewards/format_reward": 0.99873046875, "rewards/frontier_aurc_reward": -0.0034322818275541065, "rewards/frontier_ece_reward": 0.01811833530664444, "rewards/frontier_entropy_batch_reward": -0.20332725048065187, "signal/accgated_coverage_0/centered_abs_mean": 0.05779874622821808, "signal/accgated_coverage_0/group_std_mean": 0.07419940680265427, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005779874604195356, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005779874604195356, "signal/accgated_coverage_1/centered_abs_mean": 0.05779874622821808, "signal/accgated_coverage_1/group_std_mean": 0.07419940680265427, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005779874604195356, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005779874604195356, "signal/accgated_coverage_10/centered_abs_mean": 0.05779874622821808, "signal/accgated_coverage_10/group_std_mean": 0.07419940680265427, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005779874604195356, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005779874604195356, "signal/accgated_coverage_15/centered_abs_mean": 0.05779874622821808, "signal/accgated_coverage_15/group_std_mean": 0.07419940680265427, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005779874604195356, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005779874604195356, "signal/accgated_coverage_20/centered_abs_mean": 0.05779874622821808, "signal/accgated_coverage_20/group_std_mean": 0.07419940680265427, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005779874604195356, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005779874604195356, "signal/accgated_coverage_25/centered_abs_mean": 0.05779874622821808, "signal/accgated_coverage_25/group_std_mean": 0.07419940680265427, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005779874604195356, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005779874604195356, "signal/accgated_coverage_5/centered_abs_mean": 0.05779874622821808, "signal/accgated_coverage_5/group_std_mean": 0.07419940680265427, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005779874604195356, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005779874604195356, "signal/accuracy_reward/centered_abs_mean": 0.135028076171875, "signal/accuracy_reward/group_std_mean": 0.17842960655689238, "signal/accuracy_reward/group_zero_std_frac": 0.490625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0675140380859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0675140380859375, "signal/advantage_abs_mean": 0.09368720501661301, "signal/advantage_pre_scale_abs_mean": 0.09368720501661301, "signal/advantage_pre_scale_std": 0.13311106264591216, "signal/advantage_std": 0.13311106264591216, "signal/brier_reward/centered_abs_mean": 0.1977373868227005, "signal/brier_reward/group_std_mean": 0.2476351499557495, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019773739948868753, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019773739948868753, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01538306288421154, "signal/confidence_uniqueness_reward/group_std_mean": 0.023301176354289056, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0015383063117042183, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015383063117042183, "signal/format_reward/centered_abs_mean": 0.002459716796875, "signal/format_reward/group_std_mean": 0.007181553076952696, "signal/format_reward/group_zero_std_frac": 0.959375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012298583984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012298583984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026450737379491327, "signal/frontier_aurc_reward/group_std_mean": 0.003880691761150956, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3063420414691794e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3063420414691794e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.0504297137260437, "signal/frontier_ece_reward/group_std_mean": 0.06822670623660088, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0050429713912308214, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0050429713912308214, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2879321575164795, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36692259907722474, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028793216869235038, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028793216869235038, "step": 70 }, { "calibration/aurc": 0.398296738207578, "calibration/batch_distribution_entropy": 0.9787268880157762, "calibration/buffer_distribution_entropy": 0.980509843864418, "calibration/confidence_entropy": 0.4779615453676459, "calibration/coverage@0%": 0.0050804182974559685, "calibration/coverage@1%": 0.0050804182974559685, "calibration/coverage@10%": 0.021517245596868885, "calibration/coverage@15%": 0.1330594116927593, "calibration/coverage@20%": 0.17375703277886498, "calibration/coverage@25%": 0.22266542318982388, "calibration/coverage@30%": 0.2637269141389432, "calibration/coverage@5%": 0.0050804182974559685, "calibration/ece": 0.19204764250205275, "calibration/mean_confidence": 0.5236280753745388, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 715.8, "completions/max_terminated_length": 715.8, "completions/mean_length": 173.01669921875, "completions/mean_terminated_length": 173.11717834472657, "completions/min_length": 28.6, "completions/min_terminated_length": 63.0, "epoch": 0.24, "grad_norm": 0.016353704035282135, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 248230479.0, "reward": 0.9228451728820801, "reward_std": 0.13029859066009522, "rewards/accgated_coverage_0": 0.014048190228641034, "rewards/accgated_coverage_1": 0.014048190228641034, "rewards/accgated_coverage_10": 0.014048190228641034, "rewards/accgated_coverage_15": 0.014048190228641034, "rewards/accgated_coverage_20": 0.014048190228641034, "rewards/accgated_coverage_25": 0.014048190228641034, "rewards/accgated_coverage_5": 0.014048190228641034, "rewards/accuracy_reward": 0.52412109375, "rewards/brier_reward": 0.7445278406143189, "rewards/confidence_uniqueness_reward": 0.9542921662330628, "rewards/format_reward": 0.998828125, "rewards/frontier_aurc_reward": -0.0032572926487773658, "rewards/frontier_ece_reward": 0.018665025755763055, "rewards/frontier_entropy_batch_reward": -0.2017095595598221, "signal/accgated_coverage_0/centered_abs_mean": 0.06603854522109032, "signal/accgated_coverage_0/group_std_mean": 0.08608146607875825, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0066038545221090315, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0066038545221090315, "signal/accgated_coverage_1/centered_abs_mean": 0.06603854522109032, "signal/accgated_coverage_1/group_std_mean": 0.08608146607875825, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0066038545221090315, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0066038545221090315, "signal/accgated_coverage_10/centered_abs_mean": 0.06603854522109032, "signal/accgated_coverage_10/group_std_mean": 0.08608146607875825, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0066038545221090315, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0066038545221090315, "signal/accgated_coverage_15/centered_abs_mean": 0.06603854522109032, "signal/accgated_coverage_15/group_std_mean": 0.08608146607875825, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0066038545221090315, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0066038545221090315, "signal/accgated_coverage_20/centered_abs_mean": 0.06603854522109032, "signal/accgated_coverage_20/group_std_mean": 0.08608146607875825, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0066038545221090315, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0066038545221090315, "signal/accgated_coverage_25/centered_abs_mean": 0.06603854522109032, "signal/accgated_coverage_25/group_std_mean": 0.08608146607875825, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0066038545221090315, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0066038545221090315, "signal/accgated_coverage_5/centered_abs_mean": 0.06603854522109032, "signal/accgated_coverage_5/group_std_mean": 0.08608146607875825, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0066038545221090315, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0066038545221090315, "signal/accuracy_reward/centered_abs_mean": 0.153692626953125, "signal/accuracy_reward/group_std_mean": 0.20447224378585815, "signal/accuracy_reward/group_zero_std_frac": 0.409375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0768463134765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0768463134765625, "signal/advantage_abs_mean": 0.10250550508499146, "signal/advantage_pre_scale_abs_mean": 0.10250550508499146, "signal/advantage_pre_scale_std": 0.14105989634990693, "signal/advantage_std": 0.14105989634990693, "signal/brier_reward/centered_abs_mean": 0.20125386118888855, "signal/brier_reward/group_std_mean": 0.250895568728447, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020125385373830795, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020125385373830795, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014721071161329747, "signal/confidence_uniqueness_reward/group_std_mean": 0.022287074476480484, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014721071347594261, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014721071347594261, "signal/format_reward/centered_abs_mean": 0.0022705078125, "signal/format_reward/group_std_mean": 0.006629125913605094, "signal/format_reward/group_zero_std_frac": 0.9625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00113525390625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00113525390625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028927187900990247, "signal/frontier_aurc_reward/group_std_mean": 0.0041770459152758125, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.615898676798679e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.615898676798679e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.05018571987748146, "signal/frontier_ece_reward/group_std_mean": 0.06835311651229858, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005018572043627501, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005018572043627501, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28730441331863404, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3638529360294342, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028730442002415656, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028730442002415656, "step": 75 }, { "calibration/aurc": 0.33968669272836216, "calibration/batch_distribution_entropy": 0.9772573735760852, "calibration/buffer_distribution_entropy": 0.9819020986090996, "calibration/confidence_entropy": 0.4644303426340458, "calibration/coverage@0%": 0.003520220588235294, "calibration/coverage@1%": 0.003520220588235294, "calibration/coverage@10%": 0.0948468137254902, "calibration/coverage@15%": 0.18452114150454704, "calibration/coverage@20%": 0.28779996630501514, "calibration/coverage@25%": 0.38054726770845326, "calibration/coverage@30%": 0.5279652156239207, "calibration/coverage@5%": 0.050554534313725495, "calibration/ece": 0.12862788260104924, "calibration/mean_confidence": 0.5174480799208958, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 791.6, "completions/max_terminated_length": 791.6, "completions/mean_length": 172.6880859375, "completions/mean_terminated_length": 172.79131469726562, "completions/min_length": 37.6, "completions/min_terminated_length": 66.6, "epoch": 0.256, "grad_norm": 0.008825325407087803, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 265053621.0, "reward": 0.9096681714057923, "reward_std": 0.12087092399597169, "rewards/accgated_coverage_0": 0.019006002135574816, "rewards/accgated_coverage_1": 0.019006002135574816, "rewards/accgated_coverage_10": 0.019006002135574816, "rewards/accgated_coverage_15": 0.019006002135574816, "rewards/accgated_coverage_20": 0.019006002135574816, "rewards/accgated_coverage_25": 0.019006002135574816, "rewards/accgated_coverage_5": 0.019006002135574816, "rewards/accuracy_reward": 0.490234375, "rewards/brier_reward": 0.7554831981658936, "rewards/confidence_uniqueness_reward": 0.9535634636878967, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.00338795306161046, "rewards/frontier_ece_reward": 0.018822862207889555, "rewards/frontier_entropy_batch_reward": -0.21009528636932373, "signal/accgated_coverage_0/centered_abs_mean": 0.05554577559232712, "signal/accgated_coverage_0/group_std_mean": 0.07183501571416855, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005554577801376581, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005554577801376581, "signal/accgated_coverage_1/centered_abs_mean": 0.05554577559232712, "signal/accgated_coverage_1/group_std_mean": 0.07183501571416855, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005554577801376581, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005554577801376581, "signal/accgated_coverage_10/centered_abs_mean": 0.05554577559232712, "signal/accgated_coverage_10/group_std_mean": 0.07183501571416855, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005554577801376581, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005554577801376581, "signal/accgated_coverage_15/centered_abs_mean": 0.05554577559232712, "signal/accgated_coverage_15/group_std_mean": 0.07183501571416855, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005554577801376581, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005554577801376581, "signal/accgated_coverage_20/centered_abs_mean": 0.05554577559232712, "signal/accgated_coverage_20/group_std_mean": 0.07183501571416855, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005554577801376581, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005554577801376581, "signal/accgated_coverage_25/centered_abs_mean": 0.05554577559232712, "signal/accgated_coverage_25/group_std_mean": 0.07183501571416855, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005554577801376581, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005554577801376581, "signal/accgated_coverage_5/centered_abs_mean": 0.05554577559232712, "signal/accgated_coverage_5/group_std_mean": 0.07183501571416855, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005554577801376581, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005554577801376581, "signal/accuracy_reward/centered_abs_mean": 0.14427490234375, "signal/accuracy_reward/group_std_mean": 0.18380638659000398, "signal/accuracy_reward/group_zero_std_frac": 0.496875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.072137451171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.072137451171875, "signal/advantage_abs_mean": 0.0953369528055191, "signal/advantage_pre_scale_abs_mean": 0.0953369528055191, "signal/advantage_pre_scale_std": 0.13637956976890564, "signal/advantage_std": 0.13637956976890564, "signal/brier_reward/centered_abs_mean": 0.1861722558736801, "signal/brier_reward/group_std_mean": 0.23275550901889802, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018617226183414458, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018617226183414458, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01430168692022562, "signal/confidence_uniqueness_reward/group_std_mean": 0.019617033004760743, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014301687711849808, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014301687711849808, "signal/format_reward/centered_abs_mean": 0.00177001953125, "signal/format_reward/group_std_mean": 0.003914954606443644, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000885009765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000885009765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028289766516536472, "signal/frontier_aurc_reward/group_std_mean": 0.004063015244901181, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.536220756359398e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.536220756359398e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.04593289494514465, "signal/frontier_ece_reward/group_std_mean": 0.063157469779253, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004593289457261562, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004593289457261562, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2936802178621292, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3710300087928772, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029368022084236146, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029368022084236146, "step": 80 }, { "calibration/aurc": 0.4089425308725524, "calibration/batch_distribution_entropy": 0.9837127563451269, "calibration/buffer_distribution_entropy": 0.9834328797869645, "calibration/confidence_entropy": 0.4876595541216691, "calibration/coverage@0%": 0.007831631796554238, "calibration/coverage@1%": 0.007831631796554238, "calibration/coverage@10%": 0.00939718952649553, "calibration/coverage@15%": 0.01213845041441234, "calibration/coverage@20%": 0.06687872323586969, "calibration/coverage@25%": 0.2221628343118069, "calibration/coverage@30%": 0.3183875091132343, "calibration/coverage@5%": 0.007831631796554238, "calibration/ece": 0.1605411700914821, "calibration/mean_confidence": 0.5151936674590009, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 509.0, "completions/max_terminated_length": 509.0, "completions/mean_length": 179.36767578125, "completions/mean_terminated_length": 179.49051818847656, "completions/min_length": 13.6, "completions/min_terminated_length": 72.6, "epoch": 0.272, "grad_norm": 0.006936948746442795, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 281856042.0, "reward": 0.9139393091201782, "reward_std": 0.1179785043001175, "rewards/accgated_coverage_0": 0.021143794246017932, "rewards/accgated_coverage_1": 0.021143794246017932, "rewards/accgated_coverage_10": 0.021143794246017932, "rewards/accgated_coverage_15": 0.021143794246017932, "rewards/accgated_coverage_20": 0.021143794246017932, "rewards/accgated_coverage_25": 0.021143794246017932, "rewards/accgated_coverage_5": 0.021143794246017932, "rewards/accuracy_reward": 0.4890625, "rewards/brier_reward": 0.7583757877349854, "rewards/confidence_uniqueness_reward": 0.9539362549781799, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0031724351458251475, "rewards/frontier_ece_reward": 0.016499579697847367, "rewards/frontier_entropy_batch_reward": -0.17843463122844697, "signal/accgated_coverage_0/centered_abs_mean": 0.0536054477095604, "signal/accgated_coverage_0/group_std_mean": 0.06919381469488144, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0053605446591973305, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0053605446591973305, "signal/accgated_coverage_1/centered_abs_mean": 0.0536054477095604, "signal/accgated_coverage_1/group_std_mean": 0.06919381469488144, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0053605446591973305, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0053605446591973305, "signal/accgated_coverage_10/centered_abs_mean": 0.0536054477095604, "signal/accgated_coverage_10/group_std_mean": 0.06919381469488144, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0053605446591973305, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0053605446591973305, "signal/accgated_coverage_15/centered_abs_mean": 0.0536054477095604, "signal/accgated_coverage_15/group_std_mean": 0.06919381469488144, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0053605446591973305, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0053605446591973305, "signal/accgated_coverage_20/centered_abs_mean": 0.0536054477095604, "signal/accgated_coverage_20/group_std_mean": 0.06919381469488144, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0053605446591973305, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0053605446591973305, "signal/accgated_coverage_25/centered_abs_mean": 0.0536054477095604, "signal/accgated_coverage_25/group_std_mean": 0.06919381469488144, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0053605446591973305, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0053605446591973305, "signal/accgated_coverage_5/centered_abs_mean": 0.0536054477095604, "signal/accgated_coverage_5/group_std_mean": 0.06919381469488144, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0053605446591973305, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0053605446591973305, "signal/accuracy_reward/centered_abs_mean": 0.1404296875, "signal/accuracy_reward/group_std_mean": 0.17963421940803528, "signal/accuracy_reward/group_zero_std_frac": 0.5, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07021484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07021484375, "signal/advantage_abs_mean": 0.09316650480031967, "signal/advantage_pre_scale_abs_mean": 0.09316650480031967, "signal/advantage_pre_scale_std": 0.13246660977602004, "signal/advantage_std": 0.13246660977602004, "signal/brier_reward/centered_abs_mean": 0.17635450959205629, "signal/brier_reward/group_std_mean": 0.22242403626441956, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01763545088469982, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01763545088469982, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013126727193593979, "signal/confidence_uniqueness_reward/group_std_mean": 0.01895910929888487, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013126727426424623, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013126727426424623, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_std_mean": 0.004419417260214687, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024571210611611604, "signal/frontier_aurc_reward/group_std_mean": 0.0035782069433480503, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0714013701071965e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0714013701071965e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03977171406149864, "signal/frontier_ece_reward/group_std_mean": 0.055351509153842925, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0039771712385118, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0039771712385118, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2645086497068405, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34457975029945376, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026450866460800172, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026450866460800172, "step": 85 }, { "calibration/aurc": 0.3687604260891969, "calibration/batch_distribution_entropy": 0.9891861733002207, "calibration/buffer_distribution_entropy": 0.9852262916544937, "calibration/confidence_entropy": 0.49639468802077724, "calibration/coverage@0%": 0.008625955285295214, "calibration/coverage@1%": 0.008625955285295214, "calibration/coverage@10%": 0.06206406923421467, "calibration/coverage@15%": 0.08563970774109089, "calibration/coverage@20%": 0.10606885202845842, "calibration/coverage@25%": 0.1616404324651383, "calibration/coverage@30%": 0.2792983287733709, "calibration/coverage@5%": 0.037309648801994624, "calibration/ece": 0.11583087225513293, "calibration/mean_confidence": 0.5170620981196054, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 737.4, "completions/max_terminated_length": 737.4, "completions/mean_length": 179.5173828125, "completions/mean_terminated_length": 179.58653564453124, "completions/min_length": 29.6, "completions/min_terminated_length": 73.2, "epoch": 0.288, "grad_norm": 0.0012688508722931147, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 298652476.0, "reward": 0.9159511685371399, "reward_std": 0.11746599227190017, "rewards/accgated_coverage_0": 0.02089243084192276, "rewards/accgated_coverage_1": 0.02089243084192276, "rewards/accgated_coverage_10": 0.02089243084192276, "rewards/accgated_coverage_15": 0.02089243084192276, "rewards/accgated_coverage_20": 0.02089243084192276, "rewards/accgated_coverage_25": 0.02089243084192276, "rewards/accgated_coverage_5": 0.02089243084192276, "rewards/accuracy_reward": 0.49248046875, "rewards/brier_reward": 0.7615193486213684, "rewards/confidence_uniqueness_reward": 0.9535805463790894, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.003121078945696354, "rewards/frontier_ece_reward": 0.01654744055122137, "rewards/frontier_entropy_batch_reward": -0.1755122125148773, "signal/accgated_coverage_0/centered_abs_mean": 0.055193740874528885, "signal/accgated_coverage_0/group_std_mean": 0.0715998388826847, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005519374087452888, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005519374087452888, "signal/accgated_coverage_1/centered_abs_mean": 0.055193740874528885, "signal/accgated_coverage_1/group_std_mean": 0.0715998388826847, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005519374087452888, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005519374087452888, "signal/accgated_coverage_10/centered_abs_mean": 0.055193740874528885, "signal/accgated_coverage_10/group_std_mean": 0.0715998388826847, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005519374087452888, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005519374087452888, "signal/accgated_coverage_15/centered_abs_mean": 0.055193740874528885, "signal/accgated_coverage_15/group_std_mean": 0.0715998388826847, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005519374087452888, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005519374087452888, "signal/accgated_coverage_20/centered_abs_mean": 0.055193740874528885, "signal/accgated_coverage_20/group_std_mean": 0.0715998388826847, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005519374087452888, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005519374087452888, "signal/accgated_coverage_25/centered_abs_mean": 0.055193740874528885, "signal/accgated_coverage_25/group_std_mean": 0.0715998388826847, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005519374087452888, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005519374087452888, "signal/accgated_coverage_5/centered_abs_mean": 0.055193740874528885, "signal/accgated_coverage_5/group_std_mean": 0.0715998388826847, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005519374087452888, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005519374087452888, "signal/accuracy_reward/centered_abs_mean": 0.144854736328125, "signal/accuracy_reward/group_std_mean": 0.19162459969520568, "signal/accuracy_reward/group_zero_std_frac": 0.45625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0724273681640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0724273681640625, "signal/advantage_abs_mean": 0.09115136116743087, "signal/advantage_pre_scale_abs_mean": 0.09115136116743087, "signal/advantage_pre_scale_std": 0.13110833764076232, "signal/advantage_std": 0.13110833764076232, "signal/brier_reward/centered_abs_mean": 0.1735696941614151, "signal/brier_reward/group_std_mean": 0.22003813683986664, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01735696941614151, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01735696941614151, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013291171565651894, "signal/confidence_uniqueness_reward/group_std_mean": 0.01954982727766037, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001329117128625512, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001329117128625512, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_std_mean": 0.0055242716800421475, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002382648875936866, "signal/frontier_aurc_reward/group_std_mean": 0.0035583035554736854, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9783111676806585e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9783111676806585e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03827905729413032, "signal/frontier_ece_reward/group_std_mean": 0.05356697663664818, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003827905748039484, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003827905748039484, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2567889988422394, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3347454309463501, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025678900256752967, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025678900256752967, "step": 90 }, { "calibration/aurc": 0.3405218322089727, "calibration/batch_distribution_entropy": 0.9821722305232516, "calibration/buffer_distribution_entropy": 0.9865708850066515, "calibration/confidence_entropy": 0.48427231850411545, "calibration/coverage@0%": 0.0015625, "calibration/coverage@1%": 0.0015625, "calibration/coverage@10%": 0.015649509803921567, "calibration/coverage@15%": 0.06487591911764705, "calibration/coverage@20%": 0.19340226715686276, "calibration/coverage@25%": 0.32831316130904414, "calibration/coverage@30%": 0.42801504163309156, "calibration/coverage@5%": 0.0015625, "calibration/ece": 0.12548018093425042, "calibration/mean_confidence": 0.5260518163695782, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 602.8, "completions/max_terminated_length": 602.8, "completions/mean_length": 183.88154296875, "completions/mean_terminated_length": 183.9718444824219, "completions/min_length": 17.4, "completions/min_terminated_length": 78.4, "epoch": 0.304, "grad_norm": 0.0011141430586576462, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 315465375.0, "reward": 0.9172628998756409, "reward_std": 0.11238628774881362, "rewards/accgated_coverage_0": 0.021862666122615337, "rewards/accgated_coverage_1": 0.021862666122615337, "rewards/accgated_coverage_10": 0.021862666122615337, "rewards/accgated_coverage_15": 0.021862666122615337, "rewards/accgated_coverage_20": 0.021862666122615337, "rewards/accgated_coverage_25": 0.021862666122615337, "rewards/accgated_coverage_5": 0.021862666122615337, "rewards/accuracy_reward": 0.49482421875, "rewards/brier_reward": 0.751349675655365, "rewards/confidence_uniqueness_reward": 0.9536804795265198, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.0032011067494750025, "rewards/frontier_ece_reward": 0.013386439438909293, "rewards/frontier_entropy_batch_reward": -0.16815277338027954, "signal/accgated_coverage_0/centered_abs_mean": 0.05449363440275192, "signal/accgated_coverage_0/group_std_mean": 0.06998619660735131, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0054493632167577745, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0054493632167577745, "signal/accgated_coverage_1/centered_abs_mean": 0.05449363440275192, "signal/accgated_coverage_1/group_std_mean": 0.06998619660735131, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0054493632167577745, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0054493632167577745, "signal/accgated_coverage_10/centered_abs_mean": 0.05449363440275192, "signal/accgated_coverage_10/group_std_mean": 0.06998619660735131, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0054493632167577745, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0054493632167577745, "signal/accgated_coverage_15/centered_abs_mean": 0.05449363440275192, "signal/accgated_coverage_15/group_std_mean": 0.06998619660735131, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0054493632167577745, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0054493632167577745, "signal/accgated_coverage_20/centered_abs_mean": 0.05449363440275192, "signal/accgated_coverage_20/group_std_mean": 0.06998619660735131, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0054493632167577745, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0054493632167577745, "signal/accgated_coverage_25/centered_abs_mean": 0.05449363440275192, "signal/accgated_coverage_25/group_std_mean": 0.06998619660735131, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0054493632167577745, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0054493632167577745, "signal/accgated_coverage_5/centered_abs_mean": 0.05449363440275192, "signal/accgated_coverage_5/group_std_mean": 0.06998619660735131, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0054493632167577745, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0054493632167577745, "signal/accuracy_reward/centered_abs_mean": 0.133612060546875, "signal/accuracy_reward/group_std_mean": 0.17452629208564757, "signal/accuracy_reward/group_zero_std_frac": 0.503125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0668060302734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0668060302734375, "signal/advantage_abs_mean": 0.08762804120779037, "signal/advantage_pre_scale_abs_mean": 0.08762804120779037, "signal/advantage_pre_scale_std": 0.12492723762989044, "signal/advantage_std": 0.12492723762989044, "signal/brier_reward/centered_abs_mean": 0.17246018946170807, "signal/brier_reward/group_std_mean": 0.2173982620239258, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01724601909518242, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01724601909518242, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013059911504387855, "signal/confidence_uniqueness_reward/group_std_mean": 0.019081205874681473, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013059912016615272, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013059912016615272, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.004971844423562288, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024050343316048385, "signal/frontier_aurc_reward/group_std_mean": 0.003545998828485608, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0062928271945565e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0062928271945565e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03649954423308373, "signal/frontier_ece_reward/group_std_mean": 0.05162616893649101, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036499544978141783, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036499544978141783, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24413655698299408, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3214601457118988, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024413655698299407, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024413655698299407, "step": 95 }, { "calibration/aurc": 0.2793672778316487, "calibration/batch_distribution_entropy": 0.9858310757451532, "calibration/buffer_distribution_entropy": 0.9873953350221905, "calibration/confidence_entropy": 0.47684266949468457, "calibration/coverage@0%": 0.02150807240704501, "calibration/coverage@1%": 0.02150807240704501, "calibration/coverage@10%": 0.13804504036203522, "calibration/coverage@15%": 0.2718092588062622, "calibration/coverage@20%": 0.3977823813600783, "calibration/coverage@25%": 0.4634929977984344, "calibration/coverage@30%": 0.5448423740215265, "calibration/coverage@5%": 0.043004678326810174, "calibration/ece": 0.14391048489074354, "calibration/mean_confidence": 0.5321919492656221, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 844.6, "completions/max_terminated_length": 844.6, "completions/mean_length": 182.83623046875, "completions/mean_terminated_length": 183.01766967773438, "completions/min_length": 15.0, "completions/min_terminated_length": 83.4, "epoch": 0.32, "grad_norm": 0.0008273598505184054, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 332426322.0, "reward": 0.9257388591766358, "reward_std": 0.10454508364200592, "rewards/accgated_coverage_0": 0.01682482985779643, "rewards/accgated_coverage_1": 0.01682482985779643, "rewards/accgated_coverage_10": 0.01682482985779643, "rewards/accgated_coverage_15": 0.01682482985779643, "rewards/accgated_coverage_20": 0.01682482985779643, "rewards/accgated_coverage_25": 0.01682482985779643, "rewards/accgated_coverage_5": 0.01682482985779643, "rewards/accuracy_reward": 0.516015625, "rewards/brier_reward": 0.7683913350105286, "rewards/confidence_uniqueness_reward": 0.9535138487815857, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.0029093018732964993, "rewards/frontier_ece_reward": 0.017980808019638063, "rewards/frontier_entropy_batch_reward": -0.17510271370410918, "signal/accgated_coverage_0/centered_abs_mean": 0.055274638906121255, "signal/accgated_coverage_0/group_std_mean": 0.07174940705299378, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005527463788166642, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005527463788166642, "signal/accgated_coverage_1/centered_abs_mean": 0.055274638906121255, "signal/accgated_coverage_1/group_std_mean": 0.07174940705299378, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005527463788166642, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005527463788166642, "signal/accgated_coverage_10/centered_abs_mean": 0.055274638906121255, "signal/accgated_coverage_10/group_std_mean": 0.07174940705299378, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005527463788166642, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005527463788166642, "signal/accgated_coverage_15/centered_abs_mean": 0.055274638906121255, "signal/accgated_coverage_15/group_std_mean": 0.07174940705299378, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005527463788166642, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005527463788166642, "signal/accgated_coverage_20/centered_abs_mean": 0.055274638906121255, "signal/accgated_coverage_20/group_std_mean": 0.07174940705299378, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005527463788166642, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005527463788166642, "signal/accgated_coverage_25/centered_abs_mean": 0.055274638906121255, "signal/accgated_coverage_25/group_std_mean": 0.07174940705299378, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005527463788166642, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005527463788166642, "signal/accgated_coverage_5/centered_abs_mean": 0.055274638906121255, "signal/accgated_coverage_5/group_std_mean": 0.07174940705299378, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005527463788166642, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005527463788166642, "signal/accuracy_reward/centered_abs_mean": 0.1016845703125, "signal/accuracy_reward/group_std_mean": 0.14366158843040466, "signal/accuracy_reward/group_zero_std_frac": 0.553125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05084228515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05084228515625, "signal/advantage_abs_mean": 0.07974396646022797, "signal/advantage_pre_scale_abs_mean": 0.07974396646022797, "signal/advantage_pre_scale_std": 0.11739609837532043, "signal/advantage_std": 0.11739609837532043, "signal/brier_reward/centered_abs_mean": 0.16091605126857758, "signal/brier_reward/group_std_mean": 0.20566837787628173, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016091605462133885, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016091605462133885, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013066734373569488, "signal/confidence_uniqueness_reward/group_std_mean": 0.018530824780464174, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001306673465296626, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001306673465296626, "signal/format_reward/centered_abs_mean": 0.00184326171875, "signal/format_reward/group_std_mean": 0.004456133488565684, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000921630859375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000921630859375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002345598582178354, "signal/frontier_aurc_reward/group_std_mean": 0.0034848656971007584, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9319982422748582e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9319982422748582e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.03649588227272034, "signal/frontier_ece_reward/group_std_mean": 0.05092453882098198, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036495882552117108, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036495882552117108, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2617928504943848, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34030061960220337, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02617928609251976, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02617928609251976, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.4755305499734114, "eval_calibration/batch_distribution_entropy": 0.946233855720068, "eval_calibration/buffer_distribution_entropy": 0.987774627262672, "eval_calibration/confidence_entropy": 0.49974650757893324, "eval_calibration/coverage@0%": 0.046875, "eval_calibration/coverage@1%": 0.046875, "eval_calibration/coverage@10%": 0.046875, "eval_calibration/coverage@15%": 0.046875, "eval_calibration/coverage@20%": 0.0703125, "eval_calibration/coverage@25%": 0.0703125, "eval_calibration/coverage@30%": 0.0703125, "eval_calibration/coverage@5%": 0.046875, "eval_calibration/ece": 0.21127313848542387, "eval_calibration/mean_confidence": 0.47564682886743537, "eval_completions/clipped_ratio": 0.001953125, "eval_completions/max_length": 365.25, "eval_completions/max_terminated_length": 365.25, "eval_completions/mean_length": 184.06418228149414, "eval_completions/mean_terminated_length": 184.4076385498047, "eval_completions/min_length": 79.25, "eval_completions/min_terminated_length": 98.25, "eval_loss": 0.0, "eval_num_tokens": 332426322.0, "eval_reward": 0.7920490056276321, "eval_reward_std": 0.22250742837786674, "eval_rewards/accgated_coverage_0": 0.028197373263537884, "eval_rewards/accgated_coverage_1": 0.028197373263537884, "eval_rewards/accgated_coverage_10": 0.028197373263537884, "eval_rewards/accgated_coverage_15": 0.028197373263537884, "eval_rewards/accgated_coverage_20": 0.028197373263537884, "eval_rewards/accgated_coverage_25": 0.028197373263537884, "eval_rewards/accgated_coverage_5": 0.028197373263537884, "eval_rewards/accuracy_reward": 0.41015625, "eval_rewards/brier_reward": 0.7799021005630493, "eval_rewards/confidence_uniqueness_reward": 0.8919402062892914, "eval_rewards/format_reward": 0.99609375, "eval_rewards/frontier_aurc_reward": -0.0031025345670059323, "eval_rewards/frontier_ece_reward": 0.016497689532116055, "eval_rewards/frontier_entropy_batch_reward": -0.99609375, "eval_runtime": 29.4624, "eval_samples_per_second": 16.971, "eval_signal/accgated_coverage_0/centered_abs_mean": 0.06307912431657314, "eval_signal/accgated_coverage_0/group_std_mean": 0.07854281552135944, "eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.006307912524789572, "eval_signal/accgated_coverage_0/weight": 0.10000000149011612, "eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.006307912524789572, "eval_signal/accgated_coverage_1/centered_abs_mean": 0.06307912431657314, "eval_signal/accgated_coverage_1/group_std_mean": 0.07854281552135944, "eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.006307912524789572, "eval_signal/accgated_coverage_1/weight": 0.10000000149011612, "eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.006307912524789572, "eval_signal/accgated_coverage_10/centered_abs_mean": 0.06307912431657314, "eval_signal/accgated_coverage_10/group_std_mean": 0.07854281552135944, "eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.006307912524789572, "eval_signal/accgated_coverage_10/weight": 0.10000000149011612, "eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.006307912524789572, "eval_signal/accgated_coverage_15/centered_abs_mean": 0.06307912431657314, "eval_signal/accgated_coverage_15/group_std_mean": 0.07854281552135944, "eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006307912524789572, "eval_signal/accgated_coverage_15/weight": 0.10000000149011612, "eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006307912524789572, "eval_signal/accgated_coverage_20/centered_abs_mean": 0.06307912431657314, "eval_signal/accgated_coverage_20/group_std_mean": 0.07854281552135944, "eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.006307912524789572, "eval_signal/accgated_coverage_20/weight": 0.10000000149011612, "eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.006307912524789572, "eval_signal/accgated_coverage_25/centered_abs_mean": 0.06307912431657314, "eval_signal/accgated_coverage_25/group_std_mean": 0.07854281552135944, "eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.006307912524789572, "eval_signal/accgated_coverage_25/weight": 0.10000000149011612, "eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.006307912524789572, "eval_signal/accgated_coverage_5/centered_abs_mean": 0.06307912431657314, "eval_signal/accgated_coverage_5/group_std_mean": 0.07854281552135944, "eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.006307912524789572, "eval_signal/accgated_coverage_5/weight": 0.10000000149011612, "eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.006307912524789572, "eval_signal/accuracy_reward/centered_abs_mean": 0.471435546875, "eval_signal/accuracy_reward/group_std_mean": 0.4929209053516388, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2357177734375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2357177734375, "eval_signal/advantage_abs_mean": 0.19946623593568802, "eval_signal/advantage_pre_scale_abs_mean": 0.19946623593568802, "eval_signal/advantage_pre_scale_std": 0.22014939039945602, "eval_signal/advantage_std": 0.22014939039945602, "eval_signal/brier_reward/centered_abs_mean": 0.20339667797088623, "eval_signal/brier_reward/group_std_mean": 0.254949651658535, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020339668728411198, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.020339668728411198, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.045136974193155766, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06305716466158628, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0045136973494663835, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045136973494663835, "eval_signal/format_reward/centered_abs_mean": 0.007568359375, "eval_signal/format_reward/group_std_mean": 0.022097086533904076, "eval_signal/format_reward/group_zero_std_frac": 0.875, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0037841796875, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003286067338194698, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004906978341750801, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1075841181736905e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1075841181736905e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.036860852502286434, "eval_signal/frontier_ece_reward/group_std_mean": 0.05696882400661707, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036860854597762227, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036860854597762227, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.007568359375, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.022097086533904076, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.875, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0007568359724245965, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0007568359724245965, "eval_steps_per_second": 0.136, "step": 100 }, { "calibration/aurc": 0.3150930502080606, "calibration/batch_distribution_entropy": 0.9801984768190843, "calibration/buffer_distribution_entropy": 0.9893199889362148, "calibration/confidence_entropy": 0.49293979634305457, "calibration/coverage@0%": 0.00352097602739726, "calibration/coverage@1%": 0.00352097602739726, "calibration/coverage@10%": 0.03244786570450098, "calibration/coverage@15%": 0.10476929427592956, "calibration/coverage@20%": 0.1626108427103718, "calibration/coverage@25%": 0.3264279598825831, "calibration/coverage@30%": 0.5246605919765167, "calibration/coverage@5%": 0.009783206947162426, "calibration/ece": 0.12490733087500346, "calibration/mean_confidence": 0.4861851745112829, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 672.0, "completions/max_terminated_length": 672.0, "completions/mean_length": 184.3849609375, "completions/mean_terminated_length": 184.49216003417968, "completions/min_length": 35.2, "completions/min_terminated_length": 85.6, "epoch": 0.336, "grad_norm": 0.024251488968729973, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 349036856.0, "reward": 0.9335217475891113, "reward_std": 0.10637302547693253, "rewards/accgated_coverage_0": 0.016940949019044638, "rewards/accgated_coverage_1": 0.016940949019044638, "rewards/accgated_coverage_10": 0.016940949019044638, "rewards/accgated_coverage_15": 0.016940949019044638, "rewards/accgated_coverage_20": 0.016940949019044638, "rewards/accgated_coverage_25": 0.016940949019044638, "rewards/accgated_coverage_5": 0.016940949019044638, "rewards/accuracy_reward": 0.53251953125, "rewards/brier_reward": 0.7731749534606933, "rewards/confidence_uniqueness_reward": 0.9532255172729492, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.002607670472934842, "rewards/frontier_ece_reward": 0.01801227778196335, "rewards/frontier_entropy_batch_reward": -0.18663570284843445, "signal/accgated_coverage_0/centered_abs_mean": 0.05794127359986305, "signal/accgated_coverage_0/group_std_mean": 0.07467034608125686, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005794127332046628, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005794127332046628, "signal/accgated_coverage_1/centered_abs_mean": 0.05794127359986305, "signal/accgated_coverage_1/group_std_mean": 0.07467034608125686, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005794127332046628, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005794127332046628, "signal/accgated_coverage_10/centered_abs_mean": 0.05794127359986305, "signal/accgated_coverage_10/group_std_mean": 0.07467034608125686, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005794127332046628, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005794127332046628, "signal/accgated_coverage_15/centered_abs_mean": 0.05794127359986305, "signal/accgated_coverage_15/group_std_mean": 0.07467034608125686, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005794127332046628, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005794127332046628, "signal/accgated_coverage_20/centered_abs_mean": 0.05794127359986305, "signal/accgated_coverage_20/group_std_mean": 0.07467034608125686, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005794127332046628, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005794127332046628, "signal/accgated_coverage_25/centered_abs_mean": 0.05794127359986305, "signal/accgated_coverage_25/group_std_mean": 0.07467034608125686, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005794127332046628, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005794127332046628, "signal/accgated_coverage_5/centered_abs_mean": 0.05794127359986305, "signal/accgated_coverage_5/group_std_mean": 0.07467034608125686, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005794127332046628, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005794127332046628, "signal/accuracy_reward/centered_abs_mean": 0.121795654296875, "signal/accuracy_reward/group_std_mean": 0.1620877206325531, "signal/accuracy_reward/group_zero_std_frac": 0.53125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0608978271484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0608978271484375, "signal/advantage_abs_mean": 0.08342937678098679, "signal/advantage_pre_scale_abs_mean": 0.08342937678098679, "signal/advantage_pre_scale_std": 0.11973823308944702, "signal/advantage_std": 0.11973823308944702, "signal/brier_reward/centered_abs_mean": 0.15785402953624725, "signal/brier_reward/group_std_mean": 0.2000586748123169, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015785403177142145, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015785403177142145, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01279226690530777, "signal/confidence_uniqueness_reward/group_std_mean": 0.017774837836623193, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012792267836630345, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012792267836630345, "signal/format_reward/centered_abs_mean": 0.001312255859375, "signal/format_reward/group_std_mean": 0.0035306816454976795, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021891113370656966, "signal/frontier_aurc_reward/group_std_mean": 0.003217978123575449, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7363891786080784e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7363891786080784e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.0332314558327198, "signal/frontier_ece_reward/group_std_mean": 0.046575964987277986, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0033231456764042377, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0033231456764042377, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2611107021570206, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3386889636516571, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026111070811748505, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026111070811748505, "step": 105 }, { "calibration/aurc": 0.3305907231466021, "calibration/batch_distribution_entropy": 0.9634706351613571, "calibration/buffer_distribution_entropy": 0.9940211391579403, "calibration/confidence_entropy": 0.454238899033483, "calibration/coverage@0%": 0.010160072162426615, "calibration/coverage@1%": 0.010160072162426615, "calibration/coverage@10%": 0.12696076932485323, "calibration/coverage@15%": 0.25279552959882584, "calibration/coverage@20%": 0.3411264677103718, "calibration/coverage@25%": 0.4102907901174168, "calibration/coverage@30%": 0.4845355308219178, "calibration/coverage@5%": 0.042972572162426614, "calibration/ece": 0.11313356682716007, "calibration/mean_confidence": 0.4546359315943291, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 451.2, "completions/max_terminated_length": 451.2, "completions/mean_length": 185.22998046875, "completions/mean_terminated_length": 185.33853149414062, "completions/min_length": 0.0, "completions/min_terminated_length": 88.6, "epoch": 0.352, "grad_norm": 0.0009761779219843447, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 366194027.0, "reward": 0.9029169917106629, "reward_std": 0.1013871669769287, "rewards/accgated_coverage_0": 0.02567037269473076, "rewards/accgated_coverage_1": 0.02567037269473076, "rewards/accgated_coverage_10": 0.02567037269473076, "rewards/accgated_coverage_15": 0.02567037269473076, "rewards/accgated_coverage_20": 0.02567037269473076, "rewards/accgated_coverage_25": 0.02567037269473076, "rewards/accgated_coverage_5": 0.02567037269473076, "rewards/accuracy_reward": 0.460546875, "rewards/brier_reward": 0.773802924156189, "rewards/confidence_uniqueness_reward": 0.9526262640953064, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.003255124855786562, "rewards/frontier_ece_reward": 0.014954091794788838, "rewards/frontier_entropy_batch_reward": -0.1913035809993744, "signal/accgated_coverage_0/centered_abs_mean": 0.04605281800031662, "signal/accgated_coverage_0/group_std_mean": 0.0581918366253376, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0046052816323935986, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0046052816323935986, "signal/accgated_coverage_1/centered_abs_mean": 0.04605281800031662, "signal/accgated_coverage_1/group_std_mean": 0.0581918366253376, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0046052816323935986, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0046052816323935986, "signal/accgated_coverage_10/centered_abs_mean": 0.04605281800031662, "signal/accgated_coverage_10/group_std_mean": 0.0581918366253376, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0046052816323935986, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0046052816323935986, "signal/accgated_coverage_15/centered_abs_mean": 0.04605281800031662, "signal/accgated_coverage_15/group_std_mean": 0.0581918366253376, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0046052816323935986, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0046052816323935986, "signal/accgated_coverage_20/centered_abs_mean": 0.04605281800031662, "signal/accgated_coverage_20/group_std_mean": 0.0581918366253376, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0046052816323935986, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0046052816323935986, "signal/accgated_coverage_25/centered_abs_mean": 0.04605281800031662, "signal/accgated_coverage_25/group_std_mean": 0.0581918366253376, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0046052816323935986, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0046052816323935986, "signal/accgated_coverage_5/centered_abs_mean": 0.04605281800031662, "signal/accgated_coverage_5/group_std_mean": 0.0581918366253376, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0046052816323935986, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0046052816323935986, "signal/accuracy_reward/centered_abs_mean": 0.1187255859375, "signal/accuracy_reward/group_std_mean": 0.1538717418909073, "signal/accuracy_reward/group_zero_std_frac": 0.571875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05936279296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05936279296875, "signal/advantage_abs_mean": 0.07860026806592942, "signal/advantage_pre_scale_abs_mean": 0.07860026806592942, "signal/advantage_pre_scale_std": 0.1170931875705719, "signal/advantage_std": 0.1170931875705719, "signal/brier_reward/centered_abs_mean": 0.1583779364824295, "signal/brier_reward/group_std_mean": 0.2012830913066864, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015837793983519078, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015837793983519078, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013373796828091145, "signal/confidence_uniqueness_reward/group_std_mean": 0.018645089119672775, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013373797060921787, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013373797060921787, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629800856113, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002646684320643544, "signal/frontier_aurc_reward/group_std_mean": 0.0039520672988146545, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.308355480839964e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.308355480839964e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.030535892769694328, "signal/frontier_ece_reward/group_std_mean": 0.04203937202692032, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003053589351475239, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003053589351475239, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.264906769990921, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3425568282604218, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026490678265690803, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026490678265690803, "step": 110 }, { "calibration/aurc": 0.39777015618484624, "calibration/batch_distribution_entropy": 0.9746749712088182, "calibration/buffer_distribution_entropy": 0.9970537948902933, "calibration/confidence_entropy": 0.4622045118054922, "calibration/coverage@0%": 0.0035163894324853227, "calibration/coverage@1%": 0.0035163894324853227, "calibration/coverage@10%": 0.016407014432485323, "calibration/coverage@15%": 0.028125764432485323, "calibration/coverage@20%": 0.22500076443248532, "calibration/coverage@25%": 0.3039093077299413, "calibration/coverage@30%": 0.3621124327299413, "calibration/coverage@5%": 0.0035163894324853227, "calibration/ece": 0.14096880096182346, "calibration/mean_confidence": 0.5173561576705936, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 560.2, "completions/max_terminated_length": 560.2, "completions/mean_length": 184.6703125, "completions/mean_terminated_length": 184.86813659667968, "completions/min_length": 0.0, "completions/min_terminated_length": 88.4, "epoch": 0.368, "grad_norm": 0.0008754940354265273, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 383150523.0, "reward": 0.9097236037254334, "reward_std": 0.10353504717350007, "rewards/accgated_coverage_0": 0.023810245096683502, "rewards/accgated_coverage_1": 0.023810245096683502, "rewards/accgated_coverage_10": 0.023810245096683502, "rewards/accgated_coverage_15": 0.023810245096683502, "rewards/accgated_coverage_20": 0.023810245096683502, "rewards/accgated_coverage_25": 0.023810245096683502, "rewards/accgated_coverage_5": 0.023810245096683502, "rewards/accuracy_reward": 0.4810546875, "rewards/brier_reward": 0.7758293151855469, "rewards/confidence_uniqueness_reward": 0.9516843318939209, "rewards/format_reward": 0.99873046875, "rewards/frontier_aurc_reward": -0.0034904766362160444, "rewards/frontier_ece_reward": 0.014134907722473144, "rewards/frontier_entropy_batch_reward": -0.2095736712217331, "signal/accgated_coverage_0/centered_abs_mean": 0.04509783834218979, "signal/accgated_coverage_0/group_std_mean": 0.058495976775884626, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004509783769026399, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004509783769026399, "signal/accgated_coverage_1/centered_abs_mean": 0.04509783834218979, "signal/accgated_coverage_1/group_std_mean": 0.058495976775884626, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004509783769026399, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004509783769026399, "signal/accgated_coverage_10/centered_abs_mean": 0.04509783834218979, "signal/accgated_coverage_10/group_std_mean": 0.058495976775884626, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004509783769026399, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004509783769026399, "signal/accgated_coverage_15/centered_abs_mean": 0.04509783834218979, "signal/accgated_coverage_15/group_std_mean": 0.058495976775884626, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004509783769026399, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004509783769026399, "signal/accgated_coverage_20/centered_abs_mean": 0.04509783834218979, "signal/accgated_coverage_20/group_std_mean": 0.058495976775884626, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004509783769026399, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004509783769026399, "signal/accgated_coverage_25/centered_abs_mean": 0.04509783834218979, "signal/accgated_coverage_25/group_std_mean": 0.058495976775884626, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004509783769026399, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004509783769026399, "signal/accgated_coverage_5/centered_abs_mean": 0.04509783834218979, "signal/accgated_coverage_5/group_std_mean": 0.058495976775884626, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004509783769026399, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004509783769026399, "signal/accuracy_reward/centered_abs_mean": 0.11456298828125, "signal/accuracy_reward/group_std_mean": 0.1523550420999527, "signal/accuracy_reward/group_zero_std_frac": 0.55625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.057281494140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.057281494140625, "signal/advantage_abs_mean": 0.07994274199008941, "signal/advantage_pre_scale_abs_mean": 0.07994274199008941, "signal/advantage_pre_scale_std": 0.12059660255908966, "signal/advantage_std": 0.12059660255908966, "signal/brier_reward/centered_abs_mean": 0.15113866925239564, "signal/brier_reward/group_std_mean": 0.19373134672641754, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01511386651545763, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01511386651545763, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014305031299591065, "signal/confidence_uniqueness_reward/group_std_mean": 0.020990825816988946, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014305031159892677, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014305031159892677, "signal/format_reward/centered_abs_mean": 0.002423095703125, "signal/format_reward/group_std_mean": 0.00617262776941061, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012115478515625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012115478515625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003047790750861168, "signal/frontier_aurc_reward/group_std_mean": 0.00454138470813632, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.809738409472629e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.809738409472629e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.027387873083353043, "signal/frontier_ece_reward/group_std_mean": 0.0378493033349514, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027387873269617558, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027387873269617558, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28054032325744627, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3566042065620422, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028054032102227212, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028054032102227212, "step": 115 }, { "calibration/aurc": 0.3219032277188316, "calibration/batch_distribution_entropy": 0.9666301345587124, "calibration/buffer_distribution_entropy": 0.9982393081790967, "calibration/confidence_entropy": 0.4560697596889498, "calibration/coverage@0%": 0.030471825787401575, "calibration/coverage@1%": 0.030471825787401575, "calibration/coverage@10%": 0.20312807578740158, "calibration/coverage@15%": 0.2687530757874016, "calibration/coverage@20%": 0.3023468257874016, "calibration/coverage@25%": 0.34924643208661416, "calibration/coverage@30%": 0.39534325787401575, "calibration/coverage@5%": 0.07969057578740157, "calibration/ece": 0.13579660074192376, "calibration/mean_confidence": 0.4620772287971846, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 916.6, "completions/max_terminated_length": 916.6, "completions/mean_length": 186.89169921875, "completions/mean_terminated_length": 187.1309844970703, "completions/min_length": 0.0, "completions/min_terminated_length": 83.2, "epoch": 0.384, "grad_norm": 0.0008244336349889636, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 399920806.0, "reward": 0.9279178500175476, "reward_std": 0.0995772048830986, "rewards/accgated_coverage_0": 0.022253712080419062, "rewards/accgated_coverage_1": 0.022253712080419062, "rewards/accgated_coverage_10": 0.022253712080419062, "rewards/accgated_coverage_15": 0.022253712080419062, "rewards/accgated_coverage_20": 0.022253712080419062, "rewards/accgated_coverage_25": 0.022253712080419062, "rewards/accgated_coverage_5": 0.022253712080419062, "rewards/accuracy_reward": 0.51845703125, "rewards/brier_reward": 0.786729919910431, "rewards/confidence_uniqueness_reward": 0.9509214878082275, "rewards/format_reward": 0.9986328125, "rewards/frontier_aurc_reward": -0.00305885705165565, "rewards/frontier_ece_reward": 0.01511908657848835, "rewards/frontier_entropy_batch_reward": -0.214434677362442, "signal/accgated_coverage_0/centered_abs_mean": 0.05044243782758713, "signal/accgated_coverage_0/group_std_mean": 0.06544317230582238, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005044243857264518, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005044243857264518, "signal/accgated_coverage_1/centered_abs_mean": 0.05044243782758713, "signal/accgated_coverage_1/group_std_mean": 0.06544317230582238, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005044243857264518, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005044243857264518, "signal/accgated_coverage_10/centered_abs_mean": 0.05044243782758713, "signal/accgated_coverage_10/group_std_mean": 0.06544317230582238, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005044243857264518, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005044243857264518, "signal/accgated_coverage_15/centered_abs_mean": 0.05044243782758713, "signal/accgated_coverage_15/group_std_mean": 0.06544317230582238, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005044243857264518, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005044243857264518, "signal/accgated_coverage_20/centered_abs_mean": 0.05044243782758713, "signal/accgated_coverage_20/group_std_mean": 0.06544317230582238, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005044243857264518, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005044243857264518, "signal/accgated_coverage_25/centered_abs_mean": 0.05044243782758713, "signal/accgated_coverage_25/group_std_mean": 0.06544317230582238, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005044243857264518, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005044243857264518, "signal/accgated_coverage_5/centered_abs_mean": 0.05044243782758713, "signal/accgated_coverage_5/group_std_mean": 0.06544317230582238, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005044243857264518, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005044243857264518, "signal/accuracy_reward/centered_abs_mean": 0.111529541015625, "signal/accuracy_reward/group_std_mean": 0.1498907119035721, "signal/accuracy_reward/group_zero_std_frac": 0.5625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0557647705078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0557647705078125, "signal/advantage_abs_mean": 0.07648073881864548, "signal/advantage_pre_scale_abs_mean": 0.07648073881864548, "signal/advantage_pre_scale_std": 0.114204902946949, "signal/advantage_std": 0.114204902946949, "signal/brier_reward/centered_abs_mean": 0.13833243846893312, "signal/brier_reward/group_std_mean": 0.17853497862815856, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01383324433118105, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01383324433118105, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014909646660089492, "signal/confidence_uniqueness_reward/group_std_mean": 0.020950117707252504, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014909646706655622, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014909646706655622, "signal/format_reward/centered_abs_mean": 0.0025390625, "signal/format_reward/group_std_mean": 0.005538491113111377, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00126953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00126953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0030949720181524753, "signal/frontier_aurc_reward/group_std_mean": 0.0047924695536494255, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.8687149208271875e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.8687149208271875e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.023026642948389055, "signal/frontier_ece_reward/group_std_mean": 0.031030115485191346, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002302664425224066, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002302664425224066, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2783478438854218, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3542932987213135, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027834784239530563, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027834784239530563, "step": 120 }, { "calibration/aurc": 0.4324407494110575, "calibration/batch_distribution_entropy": 0.986381165469108, "calibration/buffer_distribution_entropy": 0.9980247081930601, "calibration/confidence_entropy": 0.49569577927429453, "calibration/coverage@0%": 0.003125764432485323, "calibration/coverage@1%": 0.003125764432485323, "calibration/coverage@10%": 0.003125764432485323, "calibration/coverage@15%": 0.011328889432485324, "calibration/coverage@20%": 0.02265701443248532, "calibration/coverage@25%": 0.05979849559686888, "calibration/coverage@30%": 0.18609955968688846, "calibration/coverage@5%": 0.003125764432485323, "calibration/ece": 0.1540248613869452, "calibration/mean_confidence": 0.5007551271020173, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 499.8, "completions/max_terminated_length": 499.8, "completions/mean_length": 186.47431640625, "completions/mean_terminated_length": 186.58330688476562, "completions/min_length": 16.6, "completions/min_terminated_length": 92.4, "epoch": 0.4, "grad_norm": 0.0009725225972943008, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 416866751.0, "reward": 0.9153225064277649, "reward_std": 0.10575702488422394, "rewards/accgated_coverage_0": 0.02327599683776498, "rewards/accgated_coverage_1": 0.02327599683776498, "rewards/accgated_coverage_10": 0.02327599683776498, "rewards/accgated_coverage_15": 0.02327599683776498, "rewards/accgated_coverage_20": 0.02327599683776498, "rewards/accgated_coverage_25": 0.02327599683776498, "rewards/accgated_coverage_5": 0.02327599683776498, "rewards/accuracy_reward": 0.4916015625, "rewards/brier_reward": 0.7742549061775208, "rewards/confidence_uniqueness_reward": 0.9524218559265136, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0036125687882304193, "rewards/frontier_ece_reward": 0.011607270315289497, "rewards/frontier_entropy_batch_reward": -0.20212911069393158, "signal/accgated_coverage_0/centered_abs_mean": 0.046710155159235, "signal/accgated_coverage_0/group_std_mean": 0.060144589841365816, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004671015590429306, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004671015590429306, "signal/accgated_coverage_1/centered_abs_mean": 0.046710155159235, "signal/accgated_coverage_1/group_std_mean": 0.060144589841365816, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004671015590429306, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004671015590429306, "signal/accgated_coverage_10/centered_abs_mean": 0.046710155159235, "signal/accgated_coverage_10/group_std_mean": 0.060144589841365816, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004671015590429306, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004671015590429306, "signal/accgated_coverage_15/centered_abs_mean": 0.046710155159235, "signal/accgated_coverage_15/group_std_mean": 0.060144589841365816, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004671015590429306, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004671015590429306, "signal/accgated_coverage_20/centered_abs_mean": 0.046710155159235, "signal/accgated_coverage_20/group_std_mean": 0.060144589841365816, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004671015590429306, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004671015590429306, "signal/accgated_coverage_25/centered_abs_mean": 0.046710155159235, "signal/accgated_coverage_25/group_std_mean": 0.060144589841365816, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004671015590429306, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004671015590429306, "signal/accgated_coverage_5/centered_abs_mean": 0.046710155159235, "signal/accgated_coverage_5/group_std_mean": 0.060144589841365816, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004671015590429306, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004671015590429306, "signal/accuracy_reward/centered_abs_mean": 0.1314697265625, "signal/accuracy_reward/group_std_mean": 0.16968526542186738, "signal/accuracy_reward/group_zero_std_frac": 0.528125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06573486328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06573486328125, "signal/advantage_abs_mean": 0.08297596722841263, "signal/advantage_pre_scale_abs_mean": 0.08297596722841263, "signal/advantage_pre_scale_std": 0.12231777310371399, "signal/advantage_std": 0.12231777310371399, "signal/brier_reward/centered_abs_mean": 0.14882287085056306, "signal/brier_reward/group_std_mean": 0.19086708426475524, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014882288128137588, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014882288128137588, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012974631786346436, "signal/confidence_uniqueness_reward/group_std_mean": 0.018405388668179513, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001297463197261095, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001297463197261095, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.003866990143433213, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.003667995473369956, "signal/frontier_aurc_reward/group_std_mean": 0.005854966584593058, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.584994530887343e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.584994530887343e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.02002083547413349, "signal/frontier_ece_reward/group_std_mean": 0.026638072356581687, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020020836032927035, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020020836032927035, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27425014078617094, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3491857171058655, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027425015717744826, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027425015717744826, "step": 125 }, { "calibration/aurc": 0.33193371781089065, "calibration/batch_distribution_entropy": 0.978694331339585, "calibration/buffer_distribution_entropy": 0.9977751089472529, "calibration/confidence_entropy": 0.49877098337715486, "calibration/coverage@0%": 0.008203125, "calibration/coverage@1%": 0.008203125, "calibration/coverage@10%": 0.032421875, "calibration/coverage@15%": 0.042578125, "calibration/coverage@20%": 0.11484375, "calibration/coverage@25%": 0.224609375, "calibration/coverage@30%": 0.3953125, "calibration/coverage@5%": 0.02109375, "calibration/ece": 0.10189078812995633, "calibration/mean_confidence": 0.513004817409997, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 464.4, "completions/max_terminated_length": 464.4, "completions/mean_length": 189.19814453125, "completions/mean_terminated_length": 189.2162292480469, "completions/min_length": 74.6, "completions/min_terminated_length": 91.4, "epoch": 0.416, "grad_norm": 0.0008635468548163772, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 433685324.0, "reward": 0.9197581887245179, "reward_std": 0.10262777209281922, "rewards/accgated_coverage_0": 0.02481077201664448, "rewards/accgated_coverage_1": 0.02481077201664448, "rewards/accgated_coverage_10": 0.02481077201664448, "rewards/accgated_coverage_15": 0.02481077201664448, "rewards/accgated_coverage_20": 0.02481077201664448, "rewards/accgated_coverage_25": 0.02481077201664448, "rewards/accgated_coverage_5": 0.02481077201664448, "rewards/accuracy_reward": 0.494921875, "rewards/brier_reward": 0.7818256974220276, "rewards/confidence_uniqueness_reward": 0.9530369281768799, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0033155861776322125, "rewards/frontier_ece_reward": 0.010740846581757068, "rewards/frontier_entropy_batch_reward": -0.1954037606716156, "signal/accgated_coverage_0/centered_abs_mean": 0.047776888310909274, "signal/accgated_coverage_0/group_std_mean": 0.06069251298904419, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004777689045295119, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004777689045295119, "signal/accgated_coverage_1/centered_abs_mean": 0.047776888310909274, "signal/accgated_coverage_1/group_std_mean": 0.06069251298904419, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004777689045295119, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004777689045295119, "signal/accgated_coverage_10/centered_abs_mean": 0.047776888310909274, "signal/accgated_coverage_10/group_std_mean": 0.06069251298904419, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004777689045295119, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004777689045295119, "signal/accgated_coverage_15/centered_abs_mean": 0.047776888310909274, "signal/accgated_coverage_15/group_std_mean": 0.06069251298904419, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004777689045295119, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004777689045295119, "signal/accgated_coverage_20/centered_abs_mean": 0.047776888310909274, "signal/accgated_coverage_20/group_std_mean": 0.06069251298904419, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004777689045295119, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004777689045295119, "signal/accgated_coverage_25/centered_abs_mean": 0.047776888310909274, "signal/accgated_coverage_25/group_std_mean": 0.06069251298904419, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004777689045295119, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004777689045295119, "signal/accgated_coverage_5/centered_abs_mean": 0.047776888310909274, "signal/accgated_coverage_5/group_std_mean": 0.06069251298904419, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004777689045295119, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004777689045295119, "signal/accuracy_reward/centered_abs_mean": 0.12591552734375, "signal/accuracy_reward/group_std_mean": 0.16535796225070953, "signal/accuracy_reward/group_zero_std_frac": 0.528125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062957763671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.062957763671875, "signal/advantage_abs_mean": 0.08131872415542603, "signal/advantage_pre_scale_abs_mean": 0.08131872415542603, "signal/advantage_pre_scale_std": 0.1183522805571556, "signal/advantage_std": 0.1183522805571556, "signal/brier_reward/centered_abs_mean": 0.14390135705471038, "signal/brier_reward/group_std_mean": 0.18296151161193847, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014390136301517486, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014390136301517486, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012548736296594143, "signal/confidence_uniqueness_reward/group_std_mean": 0.01589704118669033, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012548736296594143, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012548736296594143, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0033578477799892426, "signal/frontier_aurc_reward/group_std_mean": 0.005520503781735897, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.19730982684996e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.19730982684996e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.017291248589754105, "signal/frontier_ece_reward/group_std_mean": 0.02277929149568081, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017291248077526689, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017291248077526689, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2703825652599335, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34669106006622313, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027038257196545602, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027038257196545602, "step": 130 }, { "calibration/aurc": 0.28322062243077284, "calibration/batch_distribution_entropy": 0.9741314135591935, "calibration/buffer_distribution_entropy": 0.9977025041244729, "calibration/confidence_entropy": 0.4597935578261665, "calibration/coverage@0%": 0.011730980919765165, "calibration/coverage@1%": 0.011730980919765165, "calibration/coverage@10%": 0.09582161203522505, "calibration/coverage@15%": 0.17944211717221134, "calibration/coverage@20%": 0.3279461227984345, "calibration/coverage@25%": 0.41315511863992177, "calibration/coverage@30%": 0.5660928326810175, "calibration/coverage@5%": 0.011730980919765165, "calibration/ece": 0.11671168467835376, "calibration/mean_confidence": 0.5372104429192117, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 560.0, "completions/max_terminated_length": 560.0, "completions/mean_length": 189.170703125, "completions/mean_terminated_length": 189.28175354003906, "completions/min_length": 35.8, "completions/min_terminated_length": 90.0, "epoch": 0.432, "grad_norm": 0.0016940739005804062, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 450636768.0, "reward": 0.9407005071640014, "reward_std": 0.09624196439981461, "rewards/accgated_coverage_0": 0.030050896108150482, "rewards/accgated_coverage_1": 0.030050896108150482, "rewards/accgated_coverage_10": 0.030050896108150482, "rewards/accgated_coverage_15": 0.030050896108150482, "rewards/accgated_coverage_20": 0.030050896108150482, "rewards/accgated_coverage_25": 0.030050896108150482, "rewards/accgated_coverage_5": 0.030050896108150482, "rewards/accuracy_reward": 0.5318359375, "rewards/brier_reward": 0.8001113057136535, "rewards/confidence_uniqueness_reward": 0.951182758808136, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.002726729493588209, "rewards/frontier_ece_reward": 0.012993598543107509, "rewards/frontier_entropy_batch_reward": -0.22305963337421417, "signal/accgated_coverage_0/centered_abs_mean": 0.051881562918424606, "signal/accgated_coverage_0/group_std_mean": 0.06760661378502845, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005188156617805362, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005188156617805362, "signal/accgated_coverage_1/centered_abs_mean": 0.051881562918424606, "signal/accgated_coverage_1/group_std_mean": 0.06760661378502845, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005188156617805362, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005188156617805362, "signal/accgated_coverage_10/centered_abs_mean": 0.051881562918424606, "signal/accgated_coverage_10/group_std_mean": 0.06760661378502845, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005188156617805362, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005188156617805362, "signal/accgated_coverage_15/centered_abs_mean": 0.051881562918424606, "signal/accgated_coverage_15/group_std_mean": 0.06760661378502845, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005188156617805362, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005188156617805362, "signal/accgated_coverage_20/centered_abs_mean": 0.051881562918424606, "signal/accgated_coverage_20/group_std_mean": 0.06760661378502845, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005188156617805362, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005188156617805362, "signal/accgated_coverage_25/centered_abs_mean": 0.051881562918424606, "signal/accgated_coverage_25/group_std_mean": 0.06760661378502845, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005188156617805362, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005188156617805362, "signal/accgated_coverage_5/centered_abs_mean": 0.051881562918424606, "signal/accgated_coverage_5/group_std_mean": 0.06760661378502845, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005188156617805362, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005188156617805362, "signal/accuracy_reward/centered_abs_mean": 0.1138427734375, "signal/accuracy_reward/group_std_mean": 0.15121517330408096, "signal/accuracy_reward/group_zero_std_frac": 0.565625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05692138671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05692138671875, "signal/advantage_abs_mean": 0.07415008246898651, "signal/advantage_pre_scale_abs_mean": 0.07415008246898651, "signal/advantage_pre_scale_std": 0.11101247072219848, "signal/advantage_std": 0.11101247072219848, "signal/brier_reward/centered_abs_mean": 0.1365377575159073, "signal/brier_reward/group_std_mean": 0.17550874650478362, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013653775677084923, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013653775677084923, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014425282925367355, "signal/confidence_uniqueness_reward/group_std_mean": 0.020028948225080966, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014425283297896385, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014425283297896385, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.0038669900968670845, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.003307829750701785, "signal/frontier_aurc_reward/group_std_mean": 0.005367651302367449, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1347873047925535e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1347873047925535e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.016411469876766206, "signal/frontier_ece_reward/group_std_mean": 0.021296811848878862, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001641146931797266, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001641146931797266, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28924007415771485, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3657317876815796, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02892400659620762, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02892400659620762, "step": 135 }, { "calibration/aurc": 0.31087928071099336, "calibration/batch_distribution_entropy": 0.9721029922152737, "calibration/buffer_distribution_entropy": 0.9974103057197008, "calibration/confidence_entropy": 0.48614372582595544, "calibration/coverage@0%": 0.014067086594911937, "calibration/coverage@1%": 0.014067086594911937, "calibration/coverage@10%": 0.06722954378669276, "calibration/coverage@15%": 0.12155546722113501, "calibration/coverage@20%": 0.1950617661448141, "calibration/coverage@25%": 0.2666187622309198, "calibration/coverage@30%": 0.4077566964285714, "calibration/coverage@5%": 0.028520211594911936, "calibration/ece": 0.13346354883731723, "calibration/mean_confidence": 0.5590761304140146, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 668.8, "completions/max_terminated_length": 668.8, "completions/mean_length": 197.71796875, "completions/mean_terminated_length": 197.77550354003907, "completions/min_length": 38.8, "completions/min_terminated_length": 94.2, "epoch": 0.448, "grad_norm": 0.0009933625115081668, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 467614200.0, "reward": 0.9261441349983215, "reward_std": 0.09479031711816788, "rewards/accgated_coverage_0": 0.029728616401553155, "rewards/accgated_coverage_1": 0.029728616401553155, "rewards/accgated_coverage_10": 0.029728616401553155, "rewards/accgated_coverage_15": 0.029728616401553155, "rewards/accgated_coverage_20": 0.029728616401553155, "rewards/accgated_coverage_25": 0.02971052788197994, "rewards/accgated_coverage_5": 0.029728616401553155, "rewards/accuracy_reward": 0.50244140625, "rewards/brier_reward": 0.7914562702178956, "rewards/confidence_uniqueness_reward": 0.9523193597793579, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0033234899397939443, "rewards/frontier_ece_reward": 0.009857317991554737, "rewards/frontier_entropy_batch_reward": -0.21060078740119934, "signal/accgated_coverage_0/centered_abs_mean": 0.048491771519184115, "signal/accgated_coverage_0/group_std_mean": 0.062291745096445084, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004849177319556475, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004849177319556475, "signal/accgated_coverage_1/centered_abs_mean": 0.048491771519184115, "signal/accgated_coverage_1/group_std_mean": 0.062291745096445084, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004849177319556475, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004849177319556475, "signal/accgated_coverage_10/centered_abs_mean": 0.048491771519184115, "signal/accgated_coverage_10/group_std_mean": 0.062291745096445084, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004849177319556475, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004849177319556475, "signal/accgated_coverage_15/centered_abs_mean": 0.048491771519184115, "signal/accgated_coverage_15/group_std_mean": 0.062291745096445084, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004849177319556475, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004849177319556475, "signal/accgated_coverage_20/centered_abs_mean": 0.048491771519184115, "signal/accgated_coverage_20/group_std_mean": 0.062291745096445084, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004849177319556475, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004849177319556475, "signal/accgated_coverage_25/centered_abs_mean": 0.048326144367456435, "signal/accgated_coverage_25/group_std_mean": 0.062085268646478654, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004832614585757256, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004832614585757256, "signal/accgated_coverage_5/centered_abs_mean": 0.048491771519184115, "signal/accgated_coverage_5/group_std_mean": 0.062291745096445084, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004849177319556475, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004849177319556475, "signal/accuracy_reward/centered_abs_mean": 0.109490966796875, "signal/accuracy_reward/group_std_mean": 0.14455284774303437, "signal/accuracy_reward/group_zero_std_frac": 0.5875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0547454833984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0547454833984375, "signal/advantage_abs_mean": 0.07401997745037078, "signal/advantage_pre_scale_abs_mean": 0.07401997745037078, "signal/advantage_pre_scale_std": 0.1096037745475769, "signal/advantage_std": 0.1096037745475769, "signal/brier_reward/centered_abs_mean": 0.14020991921424866, "signal/brier_reward/group_std_mean": 0.1790821671485901, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014020991884171963, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014020991884171963, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013042146898806095, "signal/confidence_uniqueness_reward/group_std_mean": 0.017226839996874334, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001304214750416577, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001304214750416577, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.00343682668171823, "signal/frontier_aurc_reward/group_std_mean": 0.005777542665600777, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2960334394592795e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2960334394592795e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.01519781704992056, "signal/frontier_ece_reward/group_std_mean": 0.019867125526070596, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015197818167507649, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015197818167507649, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2822464555501938, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3577677130699158, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028224647045135498, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028224647045135498, "step": 140 }, { "calibration/aurc": 0.4260284897856108, "calibration/batch_distribution_entropy": 0.9851075345712529, "calibration/buffer_distribution_entropy": 0.996800334802546, "calibration/confidence_entropy": 0.49255589906544583, "calibration/coverage@0%": 0.004319169944296346, "calibration/coverage@1%": 0.004319169944296346, "calibration/coverage@10%": 0.017219022306501068, "calibration/coverage@15%": 0.029380685692327844, "calibration/coverage@20%": 0.06003090714902076, "calibration/coverage@25%": 0.08594441600728847, "calibration/coverage@30%": 0.1708299967159499, "calibration/coverage@5%": 0.004319169944296346, "calibration/ece": 0.14126350573298713, "calibration/mean_confidence": 0.4790509413100503, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 794.2, "completions/max_terminated_length": 794.2, "completions/mean_length": 202.572265625, "completions/mean_terminated_length": 202.75009155273438, "completions/min_length": 20.8, "completions/min_terminated_length": 97.4, "epoch": 0.464, "grad_norm": 0.0008447124273516238, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 484859356.0, "reward": 0.8944996356964111, "reward_std": 0.09351845979690551, "rewards/accgated_coverage_0": 0.025238432362675665, "rewards/accgated_coverage_1": 0.025238432362675665, "rewards/accgated_coverage_10": 0.025238432362675665, "rewards/accgated_coverage_15": 0.025238432362675665, "rewards/accgated_coverage_20": 0.025238432362675665, "rewards/accgated_coverage_25": 0.02511085756123066, "rewards/accgated_coverage_5": 0.025238432362675665, "rewards/accuracy_reward": 0.45126953125, "rewards/brier_reward": 0.7709176063537597, "rewards/confidence_uniqueness_reward": 0.9511044979095459, "rewards/format_reward": 0.99892578125, "rewards/frontier_aurc_reward": -0.0037610166240483523, "rewards/frontier_ece_reward": 0.007047755550593138, "rewards/frontier_entropy_batch_reward": -0.21112147867679595, "signal/accgated_coverage_0/centered_abs_mean": 0.037470953166484834, "signal/accgated_coverage_0/group_std_mean": 0.048257572948932646, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0037470953073352577, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0037470953073352577, "signal/accgated_coverage_1/centered_abs_mean": 0.037470953166484834, "signal/accgated_coverage_1/group_std_mean": 0.048257572948932646, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0037470953073352577, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0037470953073352577, "signal/accgated_coverage_10/centered_abs_mean": 0.037470953166484834, "signal/accgated_coverage_10/group_std_mean": 0.048257572948932646, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0037470953073352577, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0037470953073352577, "signal/accgated_coverage_15/centered_abs_mean": 0.037470953166484834, "signal/accgated_coverage_15/group_std_mean": 0.048257572948932646, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0037470953073352577, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0037470953073352577, "signal/accgated_coverage_20/centered_abs_mean": 0.037470953166484834, "signal/accgated_coverage_20/group_std_mean": 0.048257572948932646, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0037470953073352577, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0037470953073352577, "signal/accgated_coverage_25/centered_abs_mean": 0.03708973340690136, "signal/accgated_coverage_25/group_std_mean": 0.04777633249759674, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.003708973526954651, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.003708973526954651, "signal/accgated_coverage_5/centered_abs_mean": 0.037470953166484834, "signal/accgated_coverage_5/group_std_mean": 0.048257572948932646, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0037470953073352577, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0037470953073352577, "signal/accuracy_reward/centered_abs_mean": 0.093695068359375, "signal/accuracy_reward/group_std_mean": 0.1306929975748062, "signal/accuracy_reward/group_zero_std_frac": 0.596875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0468475341796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0468475341796875, "signal/advantage_abs_mean": 0.0707702711224556, "signal/advantage_pre_scale_abs_mean": 0.0707702711224556, "signal/advantage_pre_scale_std": 0.10955794602632522, "signal/advantage_std": 0.10955794602632522, "signal/brier_reward/centered_abs_mean": 0.14058507978916168, "signal/brier_reward/group_std_mean": 0.18082630336284639, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014058507792651654, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014058507792651654, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014609797485172748, "signal/confidence_uniqueness_reward/group_std_mean": 0.021237166598439217, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001460979785770178, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001460979785770178, "signal/format_reward/centered_abs_mean": 0.002069091796875, "signal/format_reward/group_std_mean": 0.005740390298888088, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003325316496193409, "signal/frontier_aurc_reward/group_std_mean": 0.005579947866499424, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.156645809416659e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.156645809416659e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.012721736542880535, "signal/frontier_ece_reward/group_std_mean": 0.01700245440006256, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012721736915409566, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012721736915409566, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2733992040157318, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3470227658748627, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02733992077410221, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02733992077410221, "step": 145 }, { "calibration/aurc": 0.28795121430498016, "calibration/batch_distribution_entropy": 0.975302575403972, "calibration/buffer_distribution_entropy": 0.9960413492852078, "calibration/confidence_entropy": 0.47715076131859047, "calibration/coverage@0%": 0.022699822651663405, "calibration/coverage@1%": 0.022699822651663405, "calibration/coverage@10%": 0.0724062805772994, "calibration/coverage@15%": 0.2818646037181996, "calibration/coverage@20%": 0.37693554305283755, "calibration/coverage@25%": 0.44147734222113505, "calibration/coverage@30%": 0.5005488625244618, "calibration/coverage@5%": 0.03718123165362035, "calibration/ece": 0.15053341353769256, "calibration/mean_confidence": 0.49011041477646156, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 679.8, "completions/max_terminated_length": 679.8, "completions/mean_length": 202.91337890625, "completions/mean_terminated_length": 203.13193969726564, "completions/min_length": 0.0, "completions/min_terminated_length": 99.4, "epoch": 0.48, "grad_norm": 0.0010805472265928984, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 501985221.0, "reward": 0.9265949487686157, "reward_std": 0.09515149295330047, "rewards/accgated_coverage_0": 0.028273304179310798, "rewards/accgated_coverage_1": 0.028273304179310798, "rewards/accgated_coverage_10": 0.028273304179310798, "rewards/accgated_coverage_15": 0.028273304179310798, "rewards/accgated_coverage_20": 0.028273304179310798, "rewards/accgated_coverage_25": 0.028124842047691345, "rewards/accgated_coverage_5": 0.028273304179310798, "rewards/accuracy_reward": 0.50966796875, "rewards/brier_reward": 0.7756298780441284, "rewards/confidence_uniqueness_reward": 0.9513458490371705, "rewards/format_reward": 0.99892578125, "rewards/frontier_aurc_reward": -0.0029657317558303476, "rewards/frontier_ece_reward": 0.007657552417367697, "rewards/frontier_entropy_batch_reward": -0.20904635787010192, "signal/accgated_coverage_0/centered_abs_mean": 0.0533129021525383, "signal/accgated_coverage_0/group_std_mean": 0.0669943556189537, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00533129028044641, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00533129028044641, "signal/accgated_coverage_1/centered_abs_mean": 0.0533129021525383, "signal/accgated_coverage_1/group_std_mean": 0.0669943556189537, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00533129028044641, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00533129028044641, "signal/accgated_coverage_10/centered_abs_mean": 0.0533129021525383, "signal/accgated_coverage_10/group_std_mean": 0.0669943556189537, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00533129028044641, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00533129028044641, "signal/accgated_coverage_15/centered_abs_mean": 0.0533129021525383, "signal/accgated_coverage_15/group_std_mean": 0.0669943556189537, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00533129028044641, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00533129028044641, "signal/accgated_coverage_20/centered_abs_mean": 0.0533129021525383, "signal/accgated_coverage_20/group_std_mean": 0.0669943556189537, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.00533129028044641, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.00533129028044641, "signal/accgated_coverage_25/centered_abs_mean": 0.05262741148471832, "signal/accgated_coverage_25/group_std_mean": 0.06615648269653321, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005262741353362799, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005262741353362799, "signal/accgated_coverage_5/centered_abs_mean": 0.0533129021525383, "signal/accgated_coverage_5/group_std_mean": 0.0669943556189537, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00533129028044641, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00533129028044641, "signal/accuracy_reward/centered_abs_mean": 0.123687744140625, "signal/accuracy_reward/group_std_mean": 0.15795093774795532, "signal/accuracy_reward/group_zero_std_frac": 0.565625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0618438720703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0618438720703125, "signal/advantage_abs_mean": 0.07340479493141175, "signal/advantage_pre_scale_abs_mean": 0.07340479493141175, "signal/advantage_pre_scale_std": 0.11030431389808655, "signal/advantage_std": 0.11030431389808655, "signal/brier_reward/centered_abs_mean": 0.1439410626888275, "signal/brier_reward/group_std_mean": 0.1817007005214691, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014394106343388557, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014394106343388557, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014166798628866672, "signal/confidence_uniqueness_reward/group_std_mean": 0.02084354721009731, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014166798675432802, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014166798675432802, "signal/format_reward/centered_abs_mean": 0.002081298828125, "signal/format_reward/group_std_mean": 0.006076698750257492, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002875799732282758, "signal/frontier_aurc_reward/group_std_mean": 0.00463168453425169, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.594749650801532e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.594749650801532e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.013105543702840805, "signal/frontier_ece_reward/group_std_mean": 0.017056282609701157, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013105543795973063, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013105543795973063, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2596915900707245, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3349157810211182, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025969159603118897, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025969159603118897, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.4669723018480348, "eval_calibration/batch_distribution_entropy": 0.919400066519976, "eval_calibration/buffer_distribution_entropy": 0.9955915939821773, "eval_calibration/confidence_entropy": 0.4693234243555436, "eval_calibration/coverage@0%": 0.0625, "eval_calibration/coverage@1%": 0.0625, "eval_calibration/coverage@10%": 0.0625, "eval_calibration/coverage@15%": 0.0625, "eval_calibration/coverage@20%": 0.171875, "eval_calibration/coverage@25%": 0.1953125, "eval_calibration/coverage@30%": 0.2890625, "eval_calibration/coverage@5%": 0.0625, "eval_calibration/ece": 0.18956857599273347, "eval_calibration/mean_confidence": 0.4655063869304471, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 486.0, "eval_completions/max_terminated_length": 486.0, "eval_completions/mean_length": 202.505859375, "eval_completions/mean_terminated_length": 202.505859375, "eval_completions/min_length": 109.0, "eval_completions/min_terminated_length": 109.0, "eval_loss": 0.0, "eval_num_tokens": 501985221.0, "eval_reward": 0.7895693480968475, "eval_reward_std": 0.2150205746293068, "eval_rewards/accgated_coverage_0": 0.028790025506168604, "eval_rewards/accgated_coverage_1": 0.028790025506168604, "eval_rewards/accgated_coverage_10": 0.028790025506168604, "eval_rewards/accgated_coverage_15": 0.028790025506168604, "eval_rewards/accgated_coverage_20": 0.028790025506168604, "eval_rewards/accgated_coverage_25": 0.028504140209406614, "eval_rewards/accgated_coverage_5": 0.028790025506168604, "eval_rewards/accuracy_reward": 0.400390625, "eval_rewards/brier_reward": 0.7909315228462219, "eval_rewards/confidence_uniqueness_reward": 0.8935546875, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.003576983988750726, "eval_rewards/frontier_ece_reward": 0.00845679291523993, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 23.3411, "eval_samples_per_second": 21.421, "eval_signal/accgated_coverage_0/centered_abs_mean": 0.05911190249025822, "eval_signal/accgated_coverage_0/group_std_mean": 0.07272995077073574, "eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005911190528422594, "eval_signal/accgated_coverage_0/weight": 0.10000000149011612, "eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005911190528422594, "eval_signal/accgated_coverage_1/centered_abs_mean": 0.05911190249025822, "eval_signal/accgated_coverage_1/group_std_mean": 0.07272995077073574, "eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005911190528422594, "eval_signal/accgated_coverage_1/weight": 0.10000000149011612, "eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005911190528422594, "eval_signal/accgated_coverage_10/centered_abs_mean": 0.05911190249025822, "eval_signal/accgated_coverage_10/group_std_mean": 0.07272995077073574, "eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005911190528422594, "eval_signal/accgated_coverage_10/weight": 0.10000000149011612, "eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005911190528422594, "eval_signal/accgated_coverage_15/centered_abs_mean": 0.05911190249025822, "eval_signal/accgated_coverage_15/group_std_mean": 0.07272995077073574, "eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005911190528422594, "eval_signal/accgated_coverage_15/weight": 0.10000000149011612, "eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005911190528422594, "eval_signal/accgated_coverage_20/centered_abs_mean": 0.05911190249025822, "eval_signal/accgated_coverage_20/group_std_mean": 0.07272995077073574, "eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005911190528422594, "eval_signal/accgated_coverage_20/weight": 0.10000000149011612, "eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005911190528422594, "eval_signal/accgated_coverage_25/centered_abs_mean": 0.0584586663171649, "eval_signal/accgated_coverage_25/group_std_mean": 0.07195482775568962, "eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005845866515301168, "eval_signal/accgated_coverage_25/weight": 0.10000000149011612, "eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005845866515301168, "eval_signal/accgated_coverage_5/centered_abs_mean": 0.05911190249025822, "eval_signal/accgated_coverage_5/group_std_mean": 0.07272995077073574, "eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005911190528422594, "eval_signal/accgated_coverage_5/weight": 0.10000000149011612, "eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005911190528422594, "eval_signal/accuracy_reward/centered_abs_mean": 0.4656982421875, "eval_signal/accuracy_reward/group_std_mean": 0.48971545696258545, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23284912109375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23284912109375, "eval_signal/advantage_abs_mean": 0.1962505839765072, "eval_signal/advantage_pre_scale_abs_mean": 0.1962505839765072, "eval_signal/advantage_pre_scale_std": 0.21267764642834663, "eval_signal/advantage_std": 0.21267764642834663, "eval_signal/brier_reward/centered_abs_mean": 0.19743013009428978, "eval_signal/brier_reward/group_std_mean": 0.25094591453671455, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019743012730032206, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019743012730032206, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0434112548828125, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05236371420323849, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004341125604696572, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004341125604696572, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004231699742376804, "eval_signal/frontier_aurc_reward/group_std_mean": 0.008058041683398187, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.2896246415912174e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.2896246415912174e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.014492101734504104, "eval_signal/frontier_ece_reward/group_std_mean": 0.02034526690840721, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014492101909127086, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014492101909127086, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.171, "step": 150 }, { "calibration/aurc": 0.39311472877279846, "calibration/batch_distribution_entropy": 0.9818095415642116, "calibration/buffer_distribution_entropy": 0.995423724980441, "calibration/confidence_entropy": 0.4736793499329807, "calibration/coverage@0%": 0.009375, "calibration/coverage@1%": 0.009375, "calibration/coverage@10%": 0.094140625, "calibration/coverage@15%": 0.15859375, "calibration/coverage@20%": 0.210546875, "calibration/coverage@25%": 0.242578125, "calibration/coverage@30%": 0.278515625, "calibration/coverage@5%": 0.0234375, "calibration/ece": 0.13816226354604164, "calibration/mean_confidence": 0.5170572832163598, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 656.8, "completions/max_terminated_length": 656.8, "completions/mean_length": 201.83125, "completions/mean_terminated_length": 201.97061157226562, "completions/min_length": 40.6, "completions/min_terminated_length": 100.2, "epoch": 0.496, "grad_norm": 0.0009272120660170913, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 519359813.0, "reward": 0.937790048122406, "reward_std": 0.09639699459075927, "rewards/accgated_coverage_0": 0.023583621345460416, "rewards/accgated_coverage_1": 0.023583621345460416, "rewards/accgated_coverage_10": 0.023583621345460416, "rewards/accgated_coverage_15": 0.023583621345460416, "rewards/accgated_coverage_20": 0.023583448119461538, "rewards/accgated_coverage_25": 0.02300034649670124, "rewards/accgated_coverage_5": 0.023583621345460416, "rewards/accuracy_reward": 0.53125, "rewards/brier_reward": 0.7865365624427796, "rewards/confidence_uniqueness_reward": 0.953377628326416, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0031201265286654235, "rewards/frontier_ece_reward": 0.00805421993136406, "rewards/frontier_entropy_batch_reward": -0.18652375936508178, "signal/accgated_coverage_0/centered_abs_mean": 0.050162599235773084, "signal/accgated_coverage_0/group_std_mean": 0.06499579325318336, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005016259755939246, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005016259755939246, "signal/accgated_coverage_1/centered_abs_mean": 0.050162599235773084, "signal/accgated_coverage_1/group_std_mean": 0.06499579325318336, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005016259755939246, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005016259755939246, "signal/accgated_coverage_10/centered_abs_mean": 0.050162599235773084, "signal/accgated_coverage_10/group_std_mean": 0.06499579325318336, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005016259755939246, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005016259755939246, "signal/accgated_coverage_15/centered_abs_mean": 0.050162599235773084, "signal/accgated_coverage_15/group_std_mean": 0.06499579325318336, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005016259755939246, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005016259755939246, "signal/accgated_coverage_20/centered_abs_mean": 0.05015862360596657, "signal/accgated_coverage_20/group_std_mean": 0.06499083563685418, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005015862174332142, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005015862174332142, "signal/accgated_coverage_25/centered_abs_mean": 0.04882904663681984, "signal/accgated_coverage_25/group_std_mean": 0.06331825703382492, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004882904980331659, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004882904980331659, "signal/accgated_coverage_5/centered_abs_mean": 0.050162599235773084, "signal/accgated_coverage_5/group_std_mean": 0.06499579325318336, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005016259755939246, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005016259755939246, "signal/accuracy_reward/centered_abs_mean": 0.1035400390625, "signal/accuracy_reward/group_std_mean": 0.1395555779337883, "signal/accuracy_reward/group_zero_std_frac": 0.5875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05177001953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05177001953125, "signal/advantage_abs_mean": 0.07500568479299545, "signal/advantage_pre_scale_abs_mean": 0.07500568479299545, "signal/advantage_pre_scale_std": 0.11144567281007767, "signal/advantage_std": 0.11144567281007767, "signal/brier_reward/centered_abs_mean": 0.1344868689775467, "signal/brier_reward/group_std_mean": 0.173132461309433, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013448686897754669, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013448686897754669, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012536250613629818, "signal/confidence_uniqueness_reward/group_std_mean": 0.018192836456000804, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012536250753328205, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012536250753328205, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_std_mean": 0.004419417306780815, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0033366796094924213, "signal/frontier_aurc_reward/group_std_mean": 0.005428153648972511, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1708495700731874e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1708495700731874e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.012212376296520232, "signal/frontier_ece_reward/group_std_mean": 0.01619528718292713, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001221237680874765, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001221237680874765, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2617306888103485, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33620988130569457, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026173070073127747, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026173070073127747, "step": 155 }, { "calibration/aurc": 0.3150095805056489, "calibration/batch_distribution_entropy": 0.9817100467714981, "calibration/buffer_distribution_entropy": 0.9952144938154825, "calibration/confidence_entropy": 0.49986996249011506, "calibration/coverage@0%": 0.024613197162426614, "calibration/coverage@1%": 0.024613197162426614, "calibration/coverage@10%": 0.18164444716242661, "calibration/coverage@15%": 0.2511756971624266, "calibration/coverage@20%": 0.3281479329745597, "calibration/coverage@25%": 0.398120260518591, "calibration/coverage@30%": 0.5111217282289628, "calibration/coverage@5%": 0.09414444716242662, "calibration/ece": 0.1394730920177929, "calibration/mean_confidence": 0.5252188455799971, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 805.2, "completions/max_terminated_length": 805.2, "completions/mean_length": 197.9140625, "completions/mean_terminated_length": 198.00892639160156, "completions/min_length": 40.2, "completions/min_terminated_length": 99.6, "epoch": 0.512, "grad_norm": 0.000822938047349453, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 536532117.0, "reward": 0.9397272348403931, "reward_std": 0.0966149166226387, "rewards/accgated_coverage_0": 0.02687025871127844, "rewards/accgated_coverage_1": 0.02687025871127844, "rewards/accgated_coverage_10": 0.026870235241949557, "rewards/accgated_coverage_15": 0.026869393698871136, "rewards/accgated_coverage_20": 0.02685362957417965, "rewards/accgated_coverage_25": 0.02521761693060398, "rewards/accgated_coverage_5": 0.02687025871127844, "rewards/accuracy_reward": 0.52978515625, "rewards/brier_reward": 0.7993659853935242, "rewards/confidence_uniqueness_reward": 0.953278124332428, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0030142725445330143, "rewards/frontier_ece_reward": 0.008514054864645005, "rewards/frontier_entropy_batch_reward": -0.19641512036323547, "signal/accgated_coverage_0/centered_abs_mean": 0.04578293636441231, "signal/accgated_coverage_0/group_std_mean": 0.06032358705997467, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004578293673694134, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004578293673694134, "signal/accgated_coverage_1/centered_abs_mean": 0.04578293636441231, "signal/accgated_coverage_1/group_std_mean": 0.06032358705997467, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004578293673694134, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004578293673694134, "signal/accgated_coverage_10/centered_abs_mean": 0.045782843977212904, "signal/accgated_coverage_10/group_std_mean": 0.060323466360569, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004578284453600645, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004578284453600645, "signal/accgated_coverage_15/centered_abs_mean": 0.045781036466360094, "signal/accgated_coverage_15/group_std_mean": 0.06032109335064888, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0045781034044921395, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0045781034044921395, "signal/accgated_coverage_20/centered_abs_mean": 0.045747237652540206, "signal/accgated_coverage_20/group_std_mean": 0.06027765348553658, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.00457472400739789, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.00457472400739789, "signal/accgated_coverage_25/centered_abs_mean": 0.042320456355810165, "signal/accgated_coverage_25/group_std_mean": 0.055887801200151445, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0042320455890148875, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0042320455890148875, "signal/accgated_coverage_5/centered_abs_mean": 0.04578293636441231, "signal/accgated_coverage_5/group_std_mean": 0.06032358705997467, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004578293673694134, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004578293673694134, "signal/accuracy_reward/centered_abs_mean": 0.107086181640625, "signal/accuracy_reward/group_std_mean": 0.14316701889038086, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0535430908203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0535430908203125, "signal/advantage_abs_mean": 0.07598457634449005, "signal/advantage_pre_scale_abs_mean": 0.07598457634449005, "signal/advantage_pre_scale_std": 0.11298816949129105, "signal/advantage_std": 0.11298816949129105, "signal/brier_reward/centered_abs_mean": 0.12746050655841829, "signal/brier_reward/group_std_mean": 0.16615513563156128, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012746050581336021, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012746050581336021, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012305822782218456, "signal/confidence_uniqueness_reward/group_std_mean": 0.01691434346139431, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012305822689086198, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012305822689086198, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0034232284408062695, "signal/frontier_aurc_reward/group_std_mean": 0.00573572600260377, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.279035638319328e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.279035638319328e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.011389912478625775, "signal/frontier_ece_reward/group_std_mean": 0.014997617527842521, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011389912338927387, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011389912338927387, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27113571763038635, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3451143801212311, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027113571763038635, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027113571763038635, "step": 160 }, { "calibration/aurc": 0.22061787661354462, "calibration/batch_distribution_entropy": 0.9858973245996058, "calibration/buffer_distribution_entropy": 0.9956247437997872, "calibration/confidence_entropy": 0.4769876723458033, "calibration/coverage@0%": 0.03400807240704501, "calibration/coverage@1%": 0.03400807240704501, "calibration/coverage@10%": 0.2533459209882583, "calibration/coverage@15%": 0.3706259173189824, "calibration/coverage@20%": 0.5038940190802348, "calibration/coverage@25%": 0.6262154476516635, "calibration/coverage@30%": 0.7200090203033268, "calibration/coverage@5%": 0.11651862157534247, "calibration/ece": 0.10647224595062652, "calibration/mean_confidence": 0.51159660174583, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 762.6, "completions/max_terminated_length": 762.6, "completions/mean_length": 201.21201171875, "completions/mean_terminated_length": 201.38941650390626, "completions/min_length": 0.0, "completions/min_terminated_length": 102.2, "epoch": 0.528, "grad_norm": 0.0009442372247576714, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 553622064.0, "reward": 0.9389090180397034, "reward_std": 0.09562487751245499, "rewards/accgated_coverage_0": 0.029423439130187034, "rewards/accgated_coverage_1": 0.029423439130187034, "rewards/accgated_coverage_10": 0.029423421248793602, "rewards/accgated_coverage_15": 0.029422985576093196, "rewards/accgated_coverage_20": 0.029388283006846906, "rewards/accgated_coverage_25": 0.02592604709789157, "rewards/accgated_coverage_5": 0.029423439130187034, "rewards/accuracy_reward": 0.52666015625, "rewards/brier_reward": 0.8011533975601196, "rewards/confidence_uniqueness_reward": 0.9519981503486633, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.0027007147902622817, "rewards/frontier_ece_reward": 0.007845096942037343, "rewards/frontier_entropy_batch_reward": -0.20241765975952147, "signal/accgated_coverage_0/centered_abs_mean": 0.051184893399477, "signal/accgated_coverage_0/group_std_mean": 0.06596878245472908, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005118489358574152, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005118489358574152, "signal/accgated_coverage_1/centered_abs_mean": 0.051184893399477, "signal/accgated_coverage_1/group_std_mean": 0.06596878245472908, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005118489358574152, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005118489358574152, "signal/accgated_coverage_10/centered_abs_mean": 0.05118483528494835, "signal/accgated_coverage_10/group_std_mean": 0.06596870943903924, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00511848358437419, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00511848358437419, "signal/accgated_coverage_15/centered_abs_mean": 0.0511826254427433, "signal/accgated_coverage_15/group_std_mean": 0.06596593111753464, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005118262674659491, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005118262674659491, "signal/accgated_coverage_20/centered_abs_mean": 0.051091020554304124, "signal/accgated_coverage_20/group_std_mean": 0.06585049033164977, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005109101999551058, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005109101999551058, "signal/accgated_coverage_25/centered_abs_mean": 0.043202555179595946, "signal/accgated_coverage_25/group_std_mean": 0.05595867335796356, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004320255620405078, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004320255620405078, "signal/accgated_coverage_5/centered_abs_mean": 0.051184893399477, "signal/accgated_coverage_5/group_std_mean": 0.06596878245472908, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005118489358574152, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005118489358574152, "signal/accuracy_reward/centered_abs_mean": 0.121319580078125, "signal/accuracy_reward/group_std_mean": 0.15382195711135865, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0606597900390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0606597900390625, "signal/advantage_abs_mean": 0.07463176250457763, "signal/advantage_pre_scale_abs_mean": 0.07463176250457763, "signal/advantage_pre_scale_std": 0.11222641915082932, "signal/advantage_std": 0.11222641915082932, "signal/brier_reward/centered_abs_mean": 0.12696380019187928, "signal/brier_reward/group_std_mean": 0.16435499489307404, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01269638016819954, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01269638016819954, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013721022568643094, "signal/confidence_uniqueness_reward/group_std_mean": 0.020104555040597917, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013721022522076964, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013721022522076964, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_std_mean": 0.00552427158690989, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0030401684809476135, "signal/frontier_aurc_reward/group_std_mean": 0.0051263408735394474, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.800210797635373e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.800210797635373e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.009970997087657451, "signal/frontier_ece_reward/group_std_mean": 0.013070202618837356, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009970997925847769, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009970997925847769, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26574829816818235, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34063880443572997, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026574830710887908, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026574830710887908, "step": 165 }, { "calibration/aurc": 0.2375771201771093, "calibration/batch_distribution_entropy": 0.9782534276807887, "calibration/buffer_distribution_entropy": 0.9957714705066371, "calibration/confidence_entropy": 0.4679197371894765, "calibration/coverage@0%": 0.024221807729941293, "calibration/coverage@1%": 0.024221807729941293, "calibration/coverage@10%": 0.15002522627201564, "calibration/coverage@15%": 0.2863648177592955, "calibration/coverage@20%": 0.436377813111546, "calibration/coverage@25%": 0.6114573140900196, "calibration/coverage@30%": 0.700177348336595, "calibration/coverage@5%": 0.08086243272994129, "calibration/ece": 0.06541616967629962, "calibration/mean_confidence": 0.5300458698977398, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 735.8, "completions/max_terminated_length": 735.8, "completions/mean_length": 205.46064453125, "completions/mean_terminated_length": 205.6419891357422, "completions/min_length": 18.6, "completions/min_terminated_length": 98.0, "epoch": 0.544, "grad_norm": 0.0008544324082322419, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 570889565.0, "reward": 0.9480875849723815, "reward_std": 0.09817802309989929, "rewards/accgated_coverage_0": 0.024963770434260367, "rewards/accgated_coverage_1": 0.024963770434260367, "rewards/accgated_coverage_10": 0.024963770434260367, "rewards/accgated_coverage_15": 0.02495814934372902, "rewards/accgated_coverage_20": 0.024749431759119034, "rewards/accgated_coverage_25": 0.020984043180942536, "rewards/accgated_coverage_5": 0.024963770434260367, "rewards/accuracy_reward": 0.55380859375, "rewards/brier_reward": 0.7872178554534912, "rewards/confidence_uniqueness_reward": 0.9521996140480041, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.002863905020058155, "rewards/frontier_ece_reward": 0.0063414408825337885, "rewards/frontier_entropy_batch_reward": -0.19972009658813478, "signal/accgated_coverage_0/centered_abs_mean": 0.05637867748737335, "signal/accgated_coverage_0/group_std_mean": 0.07339582145214081, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00563786793500185, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00563786793500185, "signal/accgated_coverage_1/centered_abs_mean": 0.05637867748737335, "signal/accgated_coverage_1/group_std_mean": 0.07339582145214081, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00563786793500185, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00563786793500185, "signal/accgated_coverage_10/centered_abs_mean": 0.05637867748737335, "signal/accgated_coverage_10/group_std_mean": 0.07339582145214081, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00563786793500185, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00563786793500185, "signal/accgated_coverage_15/centered_abs_mean": 0.05637001916766167, "signal/accgated_coverage_15/group_std_mean": 0.0733846127986908, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0056370020844042305, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0056370020844042305, "signal/accgated_coverage_20/centered_abs_mean": 0.055931567400693896, "signal/accgated_coverage_20/group_std_mean": 0.07282639741897583, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005593156814575196, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005593156814575196, "signal/accgated_coverage_25/centered_abs_mean": 0.043465451896190645, "signal/accgated_coverage_25/group_std_mean": 0.0568954698741436, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004346545320004225, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004346545320004225, "signal/accgated_coverage_5/centered_abs_mean": 0.05637867748737335, "signal/accgated_coverage_5/group_std_mean": 0.07339582145214081, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00563786793500185, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00563786793500185, "signal/accuracy_reward/centered_abs_mean": 0.124334716796875, "signal/accuracy_reward/group_std_mean": 0.1655841737985611, "signal/accuracy_reward/group_zero_std_frac": 0.528125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0621673583984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0621673583984375, "signal/advantage_abs_mean": 0.07526060044765473, "signal/advantage_pre_scale_abs_mean": 0.07526060044765473, "signal/advantage_pre_scale_std": 0.11205310374498367, "signal/advantage_std": 0.11205310374498367, "signal/brier_reward/centered_abs_mean": 0.1371670126914978, "signal/brier_reward/group_std_mean": 0.17753443121910095, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013716701604425906, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013716701604425906, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013424108549952508, "signal/confidence_uniqueness_reward/group_std_mean": 0.01916743740439415, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013424108503386379, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013424108503386379, "signal/format_reward/centered_abs_mean": 0.001690673828125, "signal/format_reward/group_std_mean": 0.004635535972192883, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003059108229354024, "signal/frontier_aurc_reward/group_std_mean": 0.005276176985353232, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.823885344900191e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.823885344900191e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.009803688712418079, "signal/frontier_ece_reward/group_std_mean": 0.012898603454232216, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000980368908494711, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000980368908494711, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26505613327026367, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3396241843700409, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026505614444613457, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026505614444613457, "step": 170 }, { "calibration/aurc": 0.25894136057077977, "calibration/batch_distribution_entropy": 0.9847305140826528, "calibration/buffer_distribution_entropy": 0.9956841179631131, "calibration/confidence_entropy": 0.4782007518896073, "calibration/coverage@0%": 0.06133041829745597, "calibration/coverage@1%": 0.10586166829745598, "calibration/coverage@10%": 0.248049168297456, "calibration/coverage@15%": 0.3293610873287671, "calibration/coverage@20%": 0.3966150929549902, "calibration/coverage@25%": 0.4638652764187867, "calibration/coverage@30%": 0.6123845706947162, "calibration/coverage@5%": 0.21445541829745599, "calibration/ece": 0.11833698865364059, "calibration/mean_confidence": 0.49375283827476124, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 695.6, "completions/max_terminated_length": 695.6, "completions/mean_length": 209.71845703125, "completions/mean_terminated_length": 209.7785430908203, "completions/min_length": 63.6, "completions/min_terminated_length": 103.2, "epoch": 0.56, "grad_norm": 0.0007731578662060201, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 587858490.0, "reward": 0.9307716369628907, "reward_std": 0.09170439690351487, "rewards/accgated_coverage_0": 0.028712420910596847, "rewards/accgated_coverage_1": 0.028712420910596847, "rewards/accgated_coverage_10": 0.028705807775259017, "rewards/accgated_coverage_15": 0.028688276931643487, "rewards/accgated_coverage_20": 0.02831815704703331, "rewards/accgated_coverage_25": 0.023915531113743782, "rewards/accgated_coverage_5": 0.028712420910596847, "rewards/accuracy_reward": 0.5115234375, "rewards/brier_reward": 0.8015724778175354, "rewards/confidence_uniqueness_reward": 0.9525643587112427, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0028625247068703175, "rewards/frontier_ece_reward": 0.006507827714085579, "rewards/frontier_entropy_batch_reward": -0.20399945378303527, "signal/accgated_coverage_0/centered_abs_mean": 0.04774615317583084, "signal/accgated_coverage_0/group_std_mean": 0.0612909272313118, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004774615447968244, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004774615447968244, "signal/accgated_coverage_1/centered_abs_mean": 0.04774615317583084, "signal/accgated_coverage_1/group_std_mean": 0.0612909272313118, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004774615447968244, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004774615447968244, "signal/accgated_coverage_10/centered_abs_mean": 0.047740576416254045, "signal/accgated_coverage_10/group_std_mean": 0.061283988505601884, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004774057678878308, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004774057678878308, "signal/accgated_coverage_15/centered_abs_mean": 0.04771261513233185, "signal/accgated_coverage_15/group_std_mean": 0.061248501390218736, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004771261801943183, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004771261801943183, "signal/accgated_coverage_20/centered_abs_mean": 0.04683285281062126, "signal/accgated_coverage_20/group_std_mean": 0.06014049053192139, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0046832853928208355, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0046832853928208355, "signal/accgated_coverage_25/centered_abs_mean": 0.034598128870129585, "signal/accgated_coverage_25/group_std_mean": 0.04468399733304977, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.003459813119843602, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.003459813119843602, "signal/accgated_coverage_5/centered_abs_mean": 0.04774615317583084, "signal/accgated_coverage_5/group_std_mean": 0.0612909272313118, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004774615447968244, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004774615447968244, "signal/accuracy_reward/centered_abs_mean": 0.10382080078125, "signal/accuracy_reward/group_std_mean": 0.14004869312047957, "signal/accuracy_reward/group_zero_std_frac": 0.590625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051910400390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.051910400390625, "signal/advantage_abs_mean": 0.07106384858489037, "signal/advantage_pre_scale_abs_mean": 0.07106384858489037, "signal/advantage_pre_scale_std": 0.10787554085254669, "signal/advantage_std": 0.10787554085254669, "signal/brier_reward/centered_abs_mean": 0.12780316174030304, "signal/brier_reward/group_std_mean": 0.16514424681663514, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01278031598776579, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01278031598776579, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012605937756597995, "signal/confidence_uniqueness_reward/group_std_mean": 0.016831617429852487, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012605937663465738, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012605937663465738, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.00284982449375093, "signal/frontier_aurc_reward/group_std_mean": 0.004916798043996096, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5622806171886624e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5622806171886624e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.008643861301243306, "signal/frontier_ece_reward/group_std_mean": 0.011418106593191623, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008643861394375563, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008643861394375563, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2669390320777893, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34026256799697874, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026693902909755707, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026693902909755707, "step": 175 }, { "calibration/aurc": 0.3135997210381514, "calibration/batch_distribution_entropy": 0.9833378524753092, "calibration/buffer_distribution_entropy": 0.9960817327032949, "calibration/confidence_entropy": 0.48438792514362294, "calibration/coverage@0%": 0.015628063725490197, "calibration/coverage@1%": 0.015628063725490197, "calibration/coverage@10%": 0.0859405637254902, "calibration/coverage@15%": 0.1921905637254902, "calibration/coverage@20%": 0.27736825980392155, "calibration/coverage@25%": 0.38324908088235293, "calibration/coverage@30%": 0.48799325980392155, "calibration/coverage@5%": 0.015628063725490197, "calibration/ece": 0.0884428689859307, "calibration/mean_confidence": 0.4981117414839547, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 578.0, "completions/max_terminated_length": 578.0, "completions/mean_length": 209.08427734375, "completions/mean_terminated_length": 209.2507751464844, "completions/min_length": 22.4, "completions/min_terminated_length": 100.6, "epoch": 0.576, "grad_norm": 0.0007929888088256121, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 605186137.0, "reward": 0.9259281992912293, "reward_std": 0.08852628320455551, "rewards/accgated_coverage_0": 0.026113039441406728, "rewards/accgated_coverage_1": 0.026113039441406728, "rewards/accgated_coverage_10": 0.02611410915851593, "rewards/accgated_coverage_15": 0.02612158302217722, "rewards/accgated_coverage_20": 0.026075875945389272, "rewards/accgated_coverage_25": 0.020272112637758254, "rewards/accgated_coverage_5": 0.026113039441406728, "rewards/accuracy_reward": 0.50966796875, "rewards/brier_reward": 0.7865149855613709, "rewards/confidence_uniqueness_reward": 0.9518932700157166, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.00332046071998775, "rewards/frontier_ece_reward": 0.005302710318937897, "rewards/frontier_entropy_batch_reward": -0.20537003576755525, "signal/accgated_coverage_0/centered_abs_mean": 0.04404938668012619, "signal/accgated_coverage_0/group_std_mean": 0.05746869742870331, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004404938966035843, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004404938966035843, "signal/accgated_coverage_1/centered_abs_mean": 0.04404938668012619, "signal/accgated_coverage_1/group_std_mean": 0.05746869742870331, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004404938966035843, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004404938966035843, "signal/accgated_coverage_10/centered_abs_mean": 0.04404330998659134, "signal/accgated_coverage_10/group_std_mean": 0.057460909336805345, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004404331091791391, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004404331091791391, "signal/accgated_coverage_15/centered_abs_mean": 0.0439866840839386, "signal/accgated_coverage_15/group_std_mean": 0.05738692060112953, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004398668650537729, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004398668650537729, "signal/accgated_coverage_20/centered_abs_mean": 0.04301303252577782, "signal/accgated_coverage_20/group_std_mean": 0.05612108111381531, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004301303531974554, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004301303531974554, "signal/accgated_coverage_25/centered_abs_mean": 0.02978185787796974, "signal/accgated_coverage_25/group_std_mean": 0.03898368887603283, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.002978185843676329, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.002978185843676329, "signal/accgated_coverage_5/centered_abs_mean": 0.04404938668012619, "signal/accgated_coverage_5/group_std_mean": 0.05746869742870331, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004404938966035843, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004404938966035843, "signal/accuracy_reward/centered_abs_mean": 0.091851806640625, "signal/accuracy_reward/group_std_mean": 0.12905680239200593, "signal/accuracy_reward/group_zero_std_frac": 0.60625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0459259033203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0459259033203125, "signal/advantage_abs_mean": 0.06676195412874222, "signal/advantage_pre_scale_abs_mean": 0.06676195412874222, "signal/advantage_pre_scale_std": 0.10248180478811264, "signal/advantage_std": 0.10248180478811264, "signal/brier_reward/centered_abs_mean": 0.126788030564785, "signal/brier_reward/group_std_mean": 0.16423529982566834, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012678803689777852, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012678803689777852, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013680145144462585, "signal/confidence_uniqueness_reward/group_std_mean": 0.019239641726017, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013680145610123872, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013680145610123872, "signal/format_reward/centered_abs_mean": 0.00150146484375, "signal/format_reward/group_std_mean": 0.004083108901977539, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000750732421875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000750732421875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029450747650116684, "signal/frontier_aurc_reward/group_std_mean": 0.004820974357426166, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.681343514472246e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.681343514472246e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.008131541311740875, "signal/frontier_ece_reward/group_std_mean": 0.010789497010409831, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008131541428156198, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008131541428156198, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26855767965316774, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3468548893928528, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026855768263339998, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026855768263339998, "step": 180 }, { "calibration/aurc": 0.2993429486542598, "calibration/batch_distribution_entropy": 0.9803370668886563, "calibration/buffer_distribution_entropy": 0.9961007246434563, "calibration/confidence_entropy": 0.4802297145211024, "calibration/coverage@0%": 0.016030149217221134, "calibration/coverage@1%": 0.016030149217221134, "calibration/coverage@10%": 0.22949639187866927, "calibration/coverage@15%": 0.3237639126712329, "calibration/coverage@20%": 0.41446229818982394, "calibration/coverage@25%": 0.5055933524951076, "calibration/coverage@30%": 0.5885044642857142, "calibration/coverage@5%": 0.11103993395303327, "calibration/ece": 0.1322413696660289, "calibration/mean_confidence": 0.4870327745342595, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 644.8, "completions/max_terminated_length": 644.8, "completions/mean_length": 209.5166015625, "completions/mean_terminated_length": 209.68014831542968, "completions/min_length": 20.2, "completions/min_terminated_length": 101.6, "epoch": 0.592, "grad_norm": 0.0008513347711414099, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 622499299.0, "reward": 0.9328482508659363, "reward_std": 0.08548958897590637, "rewards/accgated_coverage_0": 0.03096109293401241, "rewards/accgated_coverage_1": 0.03096109293401241, "rewards/accgated_coverage_10": 0.030960745736956598, "rewards/accgated_coverage_15": 0.03093497231602669, "rewards/accgated_coverage_20": 0.030443714559078218, "rewards/accgated_coverage_25": 0.02200573980808258, "rewards/accgated_coverage_5": 0.03096109293401241, "rewards/accuracy_reward": 0.51962890625, "rewards/brier_reward": 0.7947103619575501, "rewards/confidence_uniqueness_reward": 0.9510635375976563, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0027746261563152074, "rewards/frontier_ece_reward": 0.0057756159454584125, "rewards/frontier_entropy_batch_reward": -0.22418674528598787, "signal/accgated_coverage_0/centered_abs_mean": 0.04694317653775215, "signal/accgated_coverage_0/group_std_mean": 0.060206232219934465, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0046943177469074724, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0046943177469074724, "signal/accgated_coverage_1/centered_abs_mean": 0.04694317653775215, "signal/accgated_coverage_1/group_std_mean": 0.060206232219934465, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0046943177469074724, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0046943177469074724, "signal/accgated_coverage_10/centered_abs_mean": 0.046942750364542006, "signal/accgated_coverage_10/group_std_mean": 0.06020570695400238, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004694275092333555, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004694275092333555, "signal/accgated_coverage_15/centered_abs_mean": 0.04689032584428787, "signal/accgated_coverage_15/group_std_mean": 0.06014012470841408, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004689032770693302, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004689032770693302, "signal/accgated_coverage_20/centered_abs_mean": 0.04522727727890015, "signal/accgated_coverage_20/group_std_mean": 0.05803100317716599, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004522727569565177, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004522727569565177, "signal/accgated_coverage_25/centered_abs_mean": 0.0300223208963871, "signal/accgated_coverage_25/group_std_mean": 0.03874893710017204, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0030022321734577417, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0030022321734577417, "signal/accgated_coverage_5/centered_abs_mean": 0.04694317653775215, "signal/accgated_coverage_5/group_std_mean": 0.060206232219934465, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0046943177469074724, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0046943177469074724, "signal/accuracy_reward/centered_abs_mean": 0.094110107421875, "signal/accuracy_reward/group_std_mean": 0.12543713301420212, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0470550537109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0470550537109375, "signal/advantage_abs_mean": 0.06528689339756966, "signal/advantage_pre_scale_abs_mean": 0.06528689339756966, "signal/advantage_pre_scale_std": 0.10016652047634125, "signal/advantage_std": 0.10016652047634125, "signal/brier_reward/centered_abs_mean": 0.12150534689426422, "signal/brier_reward/group_std_mean": 0.1580636113882065, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012150534801185131, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012150534801185131, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01382814794778824, "signal/confidence_uniqueness_reward/group_std_mean": 0.019691282883286476, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013828148366883397, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013828148366883397, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_std_mean": 0.004419417260214687, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002671397430822253, "signal/frontier_aurc_reward/group_std_mean": 0.004718466103076935, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.339246795803774e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.339246795803774e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.007670730352401733, "signal/frontier_ece_reward/group_std_mean": 0.010097111575305462, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007670730352401734, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007670730352401734, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28037108182907106, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3554627299308777, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02803710997104645, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02803710997104645, "step": 185 }, { "calibration/aurc": 0.2385094967467943, "calibration/batch_distribution_entropy": 0.954137694554461, "calibration/buffer_distribution_entropy": 0.9959365000462033, "calibration/confidence_entropy": 0.4544575702132888, "calibration/coverage@0%": 0.03711166829745597, "calibration/coverage@1%": 0.03711166829745597, "calibration/coverage@10%": 0.27123899217221137, "calibration/coverage@15%": 0.3876796416340509, "calibration/coverage@20%": 0.4662258439334638, "calibration/coverage@25%": 0.57524844055773, "calibration/coverage@30%": 0.6795751284246576, "calibration/coverage@5%": 0.16298694349315068, "calibration/ece": 0.09929816570177578, "calibration/mean_confidence": 0.4615513737745732, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 580.0, "completions/max_terminated_length": 580.0, "completions/mean_length": 211.66328125, "completions/mean_terminated_length": 211.7886199951172, "completions/min_length": 62.8, "completions/min_terminated_length": 107.0, "epoch": 0.608, "grad_norm": 0.0006291710305958986, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 639666219.0, "reward": 0.9342318534851074, "reward_std": 0.0812569260597229, "rewards/accgated_coverage_0": 0.03671490699052811, "rewards/accgated_coverage_1": 0.03671490699052811, "rewards/accgated_coverage_10": 0.03670356012880802, "rewards/accgated_coverage_15": 0.03666727505624294, "rewards/accgated_coverage_20": 0.03530341759324074, "rewards/accgated_coverage_25": 0.025559740513563155, "rewards/accgated_coverage_5": 0.03671490699052811, "rewards/accuracy_reward": 0.5142578125, "rewards/brier_reward": 0.8174492716789246, "rewards/confidence_uniqueness_reward": 0.9497005462646484, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.002493387321010232, "rewards/frontier_ece_reward": 0.006145768519490957, "rewards/frontier_entropy_batch_reward": -0.24340350329875945, "signal/accgated_coverage_0/centered_abs_mean": 0.04637901484966278, "signal/accgated_coverage_0/group_std_mean": 0.059489642083644864, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004637901578098536, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004637901578098536, "signal/accgated_coverage_1/centered_abs_mean": 0.04637901484966278, "signal/accgated_coverage_1/group_std_mean": 0.059489642083644864, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004637901578098536, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004637901578098536, "signal/accgated_coverage_10/centered_abs_mean": 0.046342677623033526, "signal/accgated_coverage_10/group_std_mean": 0.05944279730319977, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004634267929941416, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004634267929941416, "signal/accgated_coverage_15/centered_abs_mean": 0.04627092853188515, "signal/accgated_coverage_15/group_std_mean": 0.05935083627700806, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004627093113958836, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004627093113958836, "signal/accgated_coverage_20/centered_abs_mean": 0.04318385422229767, "signal/accgated_coverage_20/group_std_mean": 0.055419516563415525, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004318385478109121, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004318385478109121, "signal/accgated_coverage_25/centered_abs_mean": 0.028055806085467337, "signal/accgated_coverage_25/group_std_mean": 0.036068766564130786, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.002805580664426088, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.002805580664426088, "signal/accgated_coverage_5/centered_abs_mean": 0.04637901484966278, "signal/accgated_coverage_5/group_std_mean": 0.059489642083644864, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004637901578098536, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004637901578098536, "signal/accuracy_reward/centered_abs_mean": 0.09171142578125, "signal/accuracy_reward/group_std_mean": 0.12622790932655334, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045855712890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.045855712890625, "signal/advantage_abs_mean": 0.061551207304000856, "signal/advantage_pre_scale_abs_mean": 0.061551207304000856, "signal/advantage_pre_scale_std": 0.09523071944713593, "signal/advantage_std": 0.09523071944713593, "signal/brier_reward/centered_abs_mean": 0.11499268561601639, "signal/brier_reward/group_std_mean": 0.1495683193206787, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011499268747866154, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011499268747866154, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01518423892557621, "signal/confidence_uniqueness_reward/group_std_mean": 0.020625585690140724, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001518423925153911, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001518423925153911, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145628869533537, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002332291193306446, "signal/frontier_aurc_reward/group_std_mean": 0.003978639096021652, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9153642390156165e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9153642390156165e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.007376821059733629, "signal/frontier_ece_reward/group_std_mean": 0.009607397019863129, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007376821245998144, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007376821245998144, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29461329579353335, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36972410678863527, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02946133129298687, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02946133129298687, "step": 190 }, { "calibration/aurc": 0.2620595794462538, "calibration/batch_distribution_entropy": 0.987211676069754, "calibration/buffer_distribution_entropy": 0.995760329457571, "calibration/confidence_entropy": 0.48855260610841517, "calibration/coverage@0%": 0.026960004892367904, "calibration/coverage@1%": 0.026960004892367904, "calibration/coverage@10%": 0.17078109711350292, "calibration/coverage@15%": 0.25478152519569475, "calibration/coverage@20%": 0.3919123043052838, "calibration/coverage@25%": 0.4970508194716243, "calibration/coverage@30%": 0.6029361851761252, "calibration/coverage@5%": 0.05979085127201565, "calibration/ece": 0.0907151147042236, "calibration/mean_confidence": 0.507227250756811, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 601.6, "completions/max_terminated_length": 601.6, "completions/mean_length": 211.51748046875, "completions/mean_terminated_length": 211.74548950195313, "completions/min_length": 0.0, "completions/min_terminated_length": 103.8, "epoch": 0.624, "grad_norm": 0.0008388682035729289, "learning_rate": 1e-06, "loss": -0.0011, "num_tokens": 657176062.0, "reward": 0.9389870643615723, "reward_std": 0.08788901269435882, "rewards/accgated_coverage_0": 0.031854826211929324, "rewards/accgated_coverage_1": 0.031854826211929324, "rewards/accgated_coverage_10": 0.031831147894263265, "rewards/accgated_coverage_15": 0.03175428584218025, "rewards/accgated_coverage_20": 0.029630653187632562, "rewards/accgated_coverage_25": 0.02046764940023422, "rewards/accgated_coverage_5": 0.03185361251235008, "rewards/accuracy_reward": 0.5208984375, "rewards/brier_reward": 0.8073629021644593, "rewards/confidence_uniqueness_reward": 0.9527830243110657, "rewards/format_reward": 0.99892578125, "rewards/frontier_aurc_reward": -0.002760437550023198, "rewards/frontier_ece_reward": 0.004892275249585509, "rewards/frontier_entropy_batch_reward": -0.18319073915481568, "signal/accgated_coverage_0/centered_abs_mean": 0.04739323109388351, "signal/accgated_coverage_0/group_std_mean": 0.0609379231929779, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004739323165267706, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004739323165267706, "signal/accgated_coverage_1/centered_abs_mean": 0.04739323109388351, "signal/accgated_coverage_1/group_std_mean": 0.0609379231929779, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004739323165267706, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004739323165267706, "signal/accgated_coverage_10/centered_abs_mean": 0.04734518453478813, "signal/accgated_coverage_10/group_std_mean": 0.06087752133607864, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00473451865836978, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00473451865836978, "signal/accgated_coverage_15/centered_abs_mean": 0.04715350121259689, "signal/accgated_coverage_15/group_std_mean": 0.06063656434416771, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004715350363403559, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004715350363403559, "signal/accgated_coverage_20/centered_abs_mean": 0.04258274808526039, "signal/accgated_coverage_20/group_std_mean": 0.054887625575065616, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0042582748923450705, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0042582748923450705, "signal/accgated_coverage_25/centered_abs_mean": 0.02506561353802681, "signal/accgated_coverage_25/group_std_mean": 0.03276568688452244, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0025065614376217128, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0025065614376217128, "signal/accgated_coverage_5/centered_abs_mean": 0.04739024117588997, "signal/accgated_coverage_5/group_std_mean": 0.060934138298034665, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0047390243038535115, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0047390243038535115, "signal/accuracy_reward/centered_abs_mean": 0.10511474609375, "signal/accuracy_reward/group_std_mean": 0.13674385845661163, "signal/accuracy_reward/group_zero_std_frac": 0.615625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052557373046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.052557373046875, "signal/advantage_abs_mean": 0.06808174103498459, "signal/advantage_pre_scale_abs_mean": 0.06808174103498459, "signal/advantage_pre_scale_std": 0.10456641763448715, "signal/advantage_std": 0.10456641763448715, "signal/brier_reward/centered_abs_mean": 0.12090775668621064, "signal/brier_reward/group_std_mean": 0.15719686448574066, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012090775556862355, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012090775556862355, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012956660613417626, "signal/confidence_uniqueness_reward/group_std_mean": 0.01948312222957611, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012956660706549884, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012956660706549884, "signal/format_reward/centered_abs_mean": 0.002081298828125, "signal/format_reward/group_std_mean": 0.006076698796823621, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002556943567469716, "signal/frontier_aurc_reward/group_std_mean": 0.004279503040015698, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.196179386577569e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.196179386577569e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.006517344154417515, "signal/frontier_ece_reward/group_std_mean": 0.008778749220073222, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000651734450366348, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000651734450366348, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25290383100509645, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3234200954437256, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025290383026003838, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025290383026003838, "step": 195 }, { "calibration/aurc": 0.28109282115966183, "calibration/batch_distribution_entropy": 0.9724146586954063, "calibration/buffer_distribution_entropy": 0.9958038898068187, "calibration/confidence_entropy": 0.489667227589128, "calibration/coverage@0%": 0.02269141389432485, "calibration/coverage@1%": 0.02269141389432485, "calibration/coverage@10%": 0.2932477678571429, "calibration/coverage@15%": 0.3534368884540117, "calibration/coverage@20%": 0.4058073935909981, "calibration/coverage@25%": 0.5222603550715305, "calibration/coverage@30%": 0.6137110142436148, "calibration/coverage@5%": 0.11897321428571428, "calibration/ece": 0.16706667744881706, "calibration/mean_confidence": 0.5486211101060794, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00166015625, "completions/max_length": 734.4, "completions/max_terminated_length": 734.4, "completions/mean_length": 212.99970703125, "completions/mean_terminated_length": 213.35613098144532, "completions/min_length": 43.2, "completions/min_terminated_length": 104.0, "epoch": 0.64, "grad_norm": 0.0009371961350552738, "learning_rate": 1e-06, "loss": -0.0011, "num_tokens": 674699867.0, "reward": 0.9512609839439392, "reward_std": 0.08684130012989044, "rewards/accgated_coverage_0": 0.02471376843750477, "rewards/accgated_coverage_1": 0.02471376843750477, "rewards/accgated_coverage_10": 0.024693097919225693, "rewards/accgated_coverage_15": 0.024671100080013275, "rewards/accgated_coverage_20": 0.022685779072344304, "rewards/accgated_coverage_25": 0.016758498549461365, "rewards/accgated_coverage_5": 0.024713458120822908, "rewards/accuracy_reward": 0.56240234375, "rewards/brier_reward": 0.8025913000106811, "rewards/confidence_uniqueness_reward": 0.9504665613174439, "rewards/format_reward": 0.99833984375, "rewards/frontier_aurc_reward": -0.002733389986678958, "rewards/frontier_ece_reward": 0.004934624442830682, "rewards/frontier_entropy_batch_reward": -0.21170130372047424, "signal/accgated_coverage_0/centered_abs_mean": 0.04717938750982285, "signal/accgated_coverage_0/group_std_mean": 0.06157350316643715, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004717938927933573, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004717938927933573, "signal/accgated_coverage_1/centered_abs_mean": 0.04717938750982285, "signal/accgated_coverage_1/group_std_mean": 0.06157350316643715, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004717938927933573, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004717938927933573, "signal/accgated_coverage_10/centered_abs_mean": 0.047129976004362105, "signal/accgated_coverage_10/group_std_mean": 0.06151040866971016, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004712997563183307, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004712997563183307, "signal/accgated_coverage_15/centered_abs_mean": 0.04697373732924461, "signal/accgated_coverage_15/group_std_mean": 0.06130784824490547, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004697373416274786, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004697373416274786, "signal/accgated_coverage_20/centered_abs_mean": 0.041337736323475836, "signal/accgated_coverage_20/group_std_mean": 0.05399098321795463, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004133773688226938, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004133773688226938, "signal/accgated_coverage_25/centered_abs_mean": 0.023771359771490096, "signal/accgated_coverage_25/group_std_mean": 0.03101888746023178, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0023771360516548156, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0023771360516548156, "signal/accgated_coverage_5/centered_abs_mean": 0.04717613756656647, "signal/accgated_coverage_5/group_std_mean": 0.06156923472881317, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00471761361695826, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00471761361695826, "signal/accuracy_reward/centered_abs_mean": 0.087603759765625, "signal/accuracy_reward/group_std_mean": 0.12051970660686492, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0438018798828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0438018798828125, "signal/advantage_abs_mean": 0.06671606823801994, "signal/advantage_pre_scale_abs_mean": 0.06671606823801994, "signal/advantage_pre_scale_std": 0.10374155789613723, "signal/advantage_std": 0.10374155789613723, "signal/brier_reward/centered_abs_mean": 0.11449979990720749, "signal/brier_reward/group_std_mean": 0.14780859649181366, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011449980735778808, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011449980735778808, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014644245617091656, "signal/confidence_uniqueness_reward/group_std_mean": 0.020360873267054557, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014644246315583588, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014644246315583588, "signal/format_reward/centered_abs_mean": 0.002838134765625, "signal/format_reward/group_std_mean": 0.005614831438288092, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014190673828125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0014190673828125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002792434743605554, "signal/frontier_aurc_reward/group_std_mean": 0.004751656157895923, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.490543349471409e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.490543349471409e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.006552870571613312, "signal/frontier_ece_reward/group_std_mean": 0.008730523101985455, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006552870734594762, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006552870734594762, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2739957094192505, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3482341289520264, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02739957198500633, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02739957198500633, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.4598180669050629, "eval_calibration/batch_distribution_entropy": 0.9273818230064164, "eval_calibration/buffer_distribution_entropy": 0.9958763092805967, "eval_calibration/confidence_entropy": 0.493588963586702, "eval_calibration/coverage@0%": 0.0625, "eval_calibration/coverage@1%": 0.0625, "eval_calibration/coverage@10%": 0.0625, "eval_calibration/coverage@15%": 0.09375, "eval_calibration/coverage@20%": 0.15625, "eval_calibration/coverage@25%": 0.234375, "eval_calibration/coverage@30%": 0.2421875, "eval_calibration/coverage@5%": 0.0625, "eval_calibration/ece": 0.21956684336417714, "eval_calibration/mean_confidence": 0.4808397541493321, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 668.0, "eval_completions/max_terminated_length": 668.0, "eval_completions/mean_length": 214.29525756835938, "eval_completions/mean_terminated_length": 214.29525756835938, "eval_completions/min_length": 119.75, "eval_completions/min_terminated_length": 119.75, "eval_loss": 0.0, "eval_num_tokens": 674699867.0, "eval_reward": 0.798967257142067, "eval_reward_std": 0.2201063111424446, "eval_rewards/accgated_coverage_0": 0.030158083885908127, "eval_rewards/accgated_coverage_1": 0.030158083885908127, "eval_rewards/accgated_coverage_10": 0.030138885602355003, "eval_rewards/accgated_coverage_15": 0.030080335214734077, "eval_rewards/accgated_coverage_20": 0.027232197113335133, "eval_rewards/accgated_coverage_25": 0.015601862454786897, "eval_rewards/accgated_coverage_5": 0.03015622543171048, "eval_rewards/accuracy_reward": 0.419921875, "eval_rewards/brier_reward": 0.791017934679985, "eval_rewards/confidence_uniqueness_reward": 0.902099609375, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0034560004714876413, "eval_rewards/frontier_ece_reward": 0.003851950401440263, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 28.9615, "eval_samples_per_second": 17.264, "eval_signal/accgated_coverage_0/centered_abs_mean": 0.06193764880299568, "eval_signal/accgated_coverage_0/group_std_mean": 0.07639571651816368, "eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0061937650898471475, "eval_signal/accgated_coverage_0/weight": 0.10000000149011612, "eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0061937650898471475, "eval_signal/accgated_coverage_1/centered_abs_mean": 0.06193764880299568, "eval_signal/accgated_coverage_1/group_std_mean": 0.07639571651816368, "eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0061937650898471475, "eval_signal/accgated_coverage_1/weight": 0.10000000149011612, "eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0061937650898471475, "eval_signal/accgated_coverage_10/centered_abs_mean": 0.06187119986861944, "eval_signal/accgated_coverage_10/group_std_mean": 0.07631925866007805, "eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.006187119870446622, "eval_signal/accgated_coverage_10/weight": 0.10000000149011612, "eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.006187119870446622, "eval_signal/accgated_coverage_15/centered_abs_mean": 0.061731474474072456, "eval_signal/accgated_coverage_15/group_std_mean": 0.0761583186686039, "eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006173147703520954, "eval_signal/accgated_coverage_15/weight": 0.10000000149011612, "eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006173147703520954, "eval_signal/accgated_coverage_20/centered_abs_mean": 0.05488021858036518, "eval_signal/accgated_coverage_20/group_std_mean": 0.06819487921893597, "eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.005488021764904261, "eval_signal/accgated_coverage_20/weight": 0.10000000149011612, "eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.005488021764904261, "eval_signal/accgated_coverage_25/centered_abs_mean": 0.027537908405065536, "eval_signal/accgated_coverage_25/group_std_mean": 0.03532271645963192, "eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.002753790933638811, "eval_signal/accgated_coverage_25/weight": 0.10000000149011612, "eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.002753790933638811, "eval_signal/accgated_coverage_5/centered_abs_mean": 0.06193420384079218, "eval_signal/accgated_coverage_5/group_std_mean": 0.07639174908399582, "eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0061934206169098616, "eval_signal/accgated_coverage_5/weight": 0.10000000149011612, "eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0061934206169098616, "eval_signal/accuracy_reward/centered_abs_mean": 0.4720458984375, "eval_signal/accuracy_reward/group_std_mean": 0.493278868496418, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23602294921875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23602294921875, "eval_signal/advantage_abs_mean": 0.20314034819602966, "eval_signal/advantage_pre_scale_abs_mean": 0.20314034819602966, "eval_signal/advantage_pre_scale_std": 0.2178102284669876, "eval_signal/advantage_std": 0.2178102284669876, "eval_signal/brier_reward/centered_abs_mean": 0.1911204643547535, "eval_signal/brier_reward/group_std_mean": 0.2394135519862175, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01911204680800438, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01911204680800438, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0389862060546875, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04497408773750067, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038986208382993937, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038986208382993937, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0041074592736549675, "eval_signal/frontier_aurc_reward/group_std_mean": 0.007324753561988473, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.134324328537332e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.134324328537332e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.007128268596716225, "eval_signal/frontier_ece_reward/group_std_mean": 0.009997925953939557, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000712826891685836, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000712826891685836, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.138, "step": 200 }, { "calibration/aurc": 0.4425023850966543, "calibration/batch_distribution_entropy": 0.973929548474511, "calibration/buffer_distribution_entropy": 0.9959812632830186, "calibration/confidence_entropy": 0.517053029578119, "calibration/coverage@0%": 0.003520211594911937, "calibration/coverage@1%": 0.003520211594911937, "calibration/coverage@10%": 0.005868548189823875, "calibration/coverage@15%": 0.005868548189823875, "calibration/coverage@20%": 0.014860567514677103, "calibration/coverage@25%": 0.1172073752446184, "calibration/coverage@30%": 0.20900577910958903, "calibration/coverage@5%": 0.003520211594911937, "calibration/ece": 0.11917904343810719, "calibration/mean_confidence": 0.4836532776329685, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 679.8, "completions/max_terminated_length": 679.8, "completions/mean_length": 213.4392578125, "completions/mean_terminated_length": 213.5014862060547, "completions/min_length": 43.8, "completions/min_terminated_length": 101.2, "epoch": 0.656, "grad_norm": 0.0007970785372890532, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 691742029.0, "reward": 0.9179041743278503, "reward_std": 0.0921265184879303, "rewards/accgated_coverage_0": 0.025301176682114603, "rewards/accgated_coverage_1": 0.025301176682114603, "rewards/accgated_coverage_10": 0.025292183458805084, "rewards/accgated_coverage_15": 0.025229696184396744, "rewards/accgated_coverage_20": 0.02275848053395748, "rewards/accgated_coverage_25": 0.014438183046877384, "rewards/accgated_coverage_5": 0.025299759954214095, "rewards/accuracy_reward": 0.4931640625, "rewards/brier_reward": 0.7853361010551453, "rewards/confidence_uniqueness_reward": 0.9532147526741028, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003199864272028208, "rewards/frontier_ece_reward": 0.0035779656376689673, "rewards/frontier_entropy_batch_reward": -0.19066329896450043, "signal/accgated_coverage_0/centered_abs_mean": 0.040899327397346495, "signal/accgated_coverage_0/group_std_mean": 0.05284639969468117, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0040899327024817465, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0040899327024817465, "signal/accgated_coverage_1/centered_abs_mean": 0.040899327397346495, "signal/accgated_coverage_1/group_std_mean": 0.05284639969468117, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0040899327024817465, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0040899327024817465, "signal/accgated_coverage_10/centered_abs_mean": 0.04087764136493206, "signal/accgated_coverage_10/group_std_mean": 0.05281898975372314, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0040877643041312695, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0040877643041312695, "signal/accgated_coverage_15/centered_abs_mean": 0.04076016694307327, "signal/accgated_coverage_15/group_std_mean": 0.052670329064130786, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004076016694307327, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004076016694307327, "signal/accgated_coverage_20/centered_abs_mean": 0.036109994351863864, "signal/accgated_coverage_20/group_std_mean": 0.046791880205273625, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0036109994165599347, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0036109994165599347, "signal/accgated_coverage_25/centered_abs_mean": 0.01965227909386158, "signal/accgated_coverage_25/group_std_mean": 0.025820601359009744, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0019652278628200293, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0019652278628200293, "signal/accgated_coverage_5/centered_abs_mean": 0.04089687131345272, "signal/accgated_coverage_5/group_std_mean": 0.052843216061592105, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004089687252417207, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004089687252417207, "signal/accuracy_reward/centered_abs_mean": 0.10579833984375, "signal/accuracy_reward/group_std_mean": 0.14109778702259063, "signal/accuracy_reward/group_zero_std_frac": 0.590625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052899169921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.052899169921875, "signal/advantage_abs_mean": 0.07221986800432205, "signal/advantage_pre_scale_abs_mean": 0.07221986800432205, "signal/advantage_pre_scale_std": 0.10920778065919876, "signal/advantage_std": 0.10920778065919876, "signal/brier_reward/centered_abs_mean": 0.12306497395038604, "signal/brier_reward/group_std_mean": 0.15778571367263794, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012306497804820538, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012306497804820538, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012087763845920562, "signal/confidence_uniqueness_reward/group_std_mean": 0.01594572402536869, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012087764218449593, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012087764218449593, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028173317667096855, "signal/frontier_aurc_reward/group_std_mean": 0.004808265902101994, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.521664693835191e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.521664693835191e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.0057150271721184255, "signal/frontier_ece_reward/group_std_mean": 0.007791910413652658, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005715027218684554, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005715027218684554, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.264396995306015, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34044753313064574, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02643970064818859, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02643970064818859, "step": 205 }, { "calibration/aurc": 0.2870710130201714, "calibration/batch_distribution_entropy": 0.9753064420948899, "calibration/buffer_distribution_entropy": 0.9963448794536054, "calibration/confidence_entropy": 0.4887674307638491, "calibration/coverage@0%": 0.00703125, "calibration/coverage@1%": 0.00703125, "calibration/coverage@10%": 0.15625, "calibration/coverage@15%": 0.233984375, "calibration/coverage@20%": 0.29375, "calibration/coverage@25%": 0.409375, "calibration/coverage@30%": 0.518359375, "calibration/coverage@5%": 0.080859375, "calibration/ece": 0.1298590207189613, "calibration/mean_confidence": 0.49373118934714694, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 768.4, "completions/max_terminated_length": 768.4, "completions/mean_length": 210.81796875, "completions/mean_terminated_length": 210.88012084960937, "completions/min_length": 39.2, "completions/min_terminated_length": 99.8, "epoch": 0.672, "grad_norm": 0.0009423012379556894, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 708814245.0, "reward": 0.9310911059379577, "reward_std": 0.08257418423891068, "rewards/accgated_coverage_0": 0.033774099126458165, "rewards/accgated_coverage_1": 0.033774099126458165, "rewards/accgated_coverage_10": 0.03377165608108044, "rewards/accgated_coverage_15": 0.03372667729854584, "rewards/accgated_coverage_20": 0.030913470312952995, "rewards/accgated_coverage_25": 0.01957174502313137, "rewards/accgated_coverage_5": 0.03377415724098683, "rewards/accuracy_reward": 0.51240234375, "rewards/brier_reward": 0.7978991866111755, "rewards/confidence_uniqueness_reward": 0.950904655456543, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.00275814956985414, "rewards/frontier_ece_reward": 0.004061966063454747, "rewards/frontier_entropy_batch_reward": -0.2214625895023346, "signal/accgated_coverage_0/centered_abs_mean": 0.047636684775352475, "signal/accgated_coverage_0/group_std_mean": 0.06049715206027031, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004763668589293957, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004763668589293957, "signal/accgated_coverage_1/centered_abs_mean": 0.047636684775352475, "signal/accgated_coverage_1/group_std_mean": 0.06049715206027031, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004763668589293957, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004763668589293957, "signal/accgated_coverage_10/centered_abs_mean": 0.047632255405187604, "signal/accgated_coverage_10/group_std_mean": 0.060491522401571275, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004763225605711341, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004763225605711341, "signal/accgated_coverage_15/centered_abs_mean": 0.04751182347536087, "signal/accgated_coverage_15/group_std_mean": 0.06034188643097878, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004751182394102216, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004751182394102216, "signal/accgated_coverage_20/centered_abs_mean": 0.04096822217106819, "signal/accgated_coverage_20/group_std_mean": 0.052256053686141966, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.00409682234749198, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.00409682234749198, "signal/accgated_coverage_25/centered_abs_mean": 0.022413133084774016, "signal/accgated_coverage_25/group_std_mean": 0.028731198236346244, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0022413132712244986, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0022413132712244986, "signal/accgated_coverage_5/centered_abs_mean": 0.04763597846031189, "signal/accgated_coverage_5/group_std_mean": 0.06049617603421211, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004763598088175058, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004763598088175058, "signal/accuracy_reward/centered_abs_mean": 0.097918701171875, "signal/accuracy_reward/group_std_mean": 0.13083914667367935, "signal/accuracy_reward/group_zero_std_frac": 0.621875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0489593505859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0489593505859375, "signal/advantage_abs_mean": 0.06339110806584358, "signal/advantage_pre_scale_abs_mean": 0.06339110806584358, "signal/advantage_pre_scale_std": 0.09667609930038452, "signal/advantage_std": 0.09667609930038452, "signal/brier_reward/centered_abs_mean": 0.12451072931289672, "signal/brier_reward/group_std_mean": 0.15938679575920106, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012451073713600635, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012451073713600635, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014077316224575042, "signal/confidence_uniqueness_reward/group_std_mean": 0.018521204963326453, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014077316503971816, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014077316503971816, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002460779994726181, "signal/frontier_aurc_reward/group_std_mean": 0.004416647460311651, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.075975109823048e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.075975109823048e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.005778457596898079, "signal/frontier_ece_reward/group_std_mean": 0.007743468787521124, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005778457503765822, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005778457503765822, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27691051959991453, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35814193487167356, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027691051363945007, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027691051363945007, "step": 210 }, { "calibration/aurc": 0.33490676273829906, "calibration/batch_distribution_entropy": 0.9797438124139027, "calibration/buffer_distribution_entropy": 0.9964835959383807, "calibration/confidence_entropy": 0.49407399575377536, "calibration/coverage@0%": 0.011328125, "calibration/coverage@1%": 0.011328125, "calibration/coverage@10%": 0.120703125, "calibration/coverage@15%": 0.2234375, "calibration/coverage@20%": 0.312109375, "calibration/coverage@25%": 0.3828125, "calibration/coverage@30%": 0.563671875, "calibration/coverage@5%": 0.043359375, "calibration/ece": 0.12613568302739456, "calibration/mean_confidence": 0.4738847542957731, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 830.4, "completions/max_terminated_length": 830.4, "completions/mean_length": 212.3232421875, "completions/mean_terminated_length": 212.44719848632812, "completions/min_length": 0.0, "completions/min_terminated_length": 105.4, "epoch": 0.688, "grad_norm": 0.0009251743904314935, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 725942355.0, "reward": 0.9335816979408265, "reward_std": 0.08509753495454789, "rewards/accgated_coverage_0": 0.026787951961159705, "rewards/accgated_coverage_1": 0.026787951961159705, "rewards/accgated_coverage_10": 0.026786612719297408, "rewards/accgated_coverage_15": 0.026709262281656265, "rewards/accgated_coverage_20": 0.02241134848445654, "rewards/accgated_coverage_25": 0.01569197904318571, "rewards/accgated_coverage_5": 0.026787951961159705, "rewards/accuracy_reward": 0.5232421875, "rewards/brier_reward": 0.7955889105796814, "rewards/confidence_uniqueness_reward": 0.9520182609558105, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.002821239922195673, "rewards/frontier_ece_reward": 0.0037543469108641146, "rewards/frontier_entropy_batch_reward": -0.20043619871139526, "signal/accgated_coverage_0/centered_abs_mean": 0.052465547993779185, "signal/accgated_coverage_0/group_std_mean": 0.066448612511158, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005246554780751467, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005246554780751467, "signal/accgated_coverage_1/centered_abs_mean": 0.052465547993779185, "signal/accgated_coverage_1/group_std_mean": 0.066448612511158, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005246554780751467, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005246554780751467, "signal/accgated_coverage_10/centered_abs_mean": 0.052459338679909706, "signal/accgated_coverage_10/group_std_mean": 0.06644077599048615, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00524593386799097, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00524593386799097, "signal/accgated_coverage_15/centered_abs_mean": 0.05215623266994953, "signal/accgated_coverage_15/group_std_mean": 0.06606373339891433, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005215623416006565, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005215623416006565, "signal/accgated_coverage_20/centered_abs_mean": 0.04323282837867737, "signal/accgated_coverage_20/group_std_mean": 0.05498237237334251, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0043232828378677365, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0043232828378677365, "signal/accgated_coverage_25/centered_abs_mean": 0.023756309226155282, "signal/accgated_coverage_25/group_std_mean": 0.03036804832518101, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0023756310110911727, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0023756310110911727, "signal/accgated_coverage_5/centered_abs_mean": 0.052465547993779185, "signal/accgated_coverage_5/group_std_mean": 0.066448612511158, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005246554780751467, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005246554780751467, "signal/accuracy_reward/centered_abs_mean": 0.1098876953125, "signal/accuracy_reward/group_std_mean": 0.14553710520267488, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05494384765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05494384765625, "signal/advantage_abs_mean": 0.0653716504573822, "signal/advantage_pre_scale_abs_mean": 0.0653716504573822, "signal/advantage_pre_scale_std": 0.10079272240400314, "signal/advantage_std": 0.10079272240400314, "signal/brier_reward/centered_abs_mean": 0.12485045194625854, "signal/brier_reward/group_std_mean": 0.16001889407634734, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01248504538089037, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01248504538089037, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013444668985903263, "signal/confidence_uniqueness_reward/group_std_mean": 0.01862073801457882, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013444669311866164, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013444669311866164, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629800856113, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002512581180781126, "signal/frontier_aurc_reward/group_std_mean": 0.004061655001714825, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1407264032168314e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1407264032168314e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.005848802160471678, "signal/frontier_ece_reward/group_std_mean": 0.007683264184743166, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005848802160471678, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005848802160471678, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26048979461193084, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3358060121536255, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02604898065328598, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02604898065328598, "step": 215 }, { "calibration/aurc": 0.28453259958201943, "calibration/batch_distribution_entropy": 0.9729883365829831, "calibration/buffer_distribution_entropy": 0.996587227545891, "calibration/confidence_entropy": 0.4705053015114881, "calibration/coverage@0%": 0.009375, "calibration/coverage@1%": 0.009375, "calibration/coverage@10%": 0.012890625, "calibration/coverage@15%": 0.11836549045988258, "calibration/coverage@20%": 0.2461013943248532, "calibration/coverage@25%": 0.48790667808219174, "calibration/coverage@30%": 0.6070847602739726, "calibration/coverage@5%": 0.009375, "calibration/ece": 0.1117538420563358, "calibration/mean_confidence": 0.5099229643395624, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 730.8, "completions/max_terminated_length": 730.8, "completions/mean_length": 209.2603515625, "completions/mean_terminated_length": 209.3418701171875, "completions/min_length": 16.2, "completions/min_terminated_length": 101.4, "epoch": 0.704, "grad_norm": 0.001095001120120287, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 742951325.0, "reward": 0.9403750419616699, "reward_std": 0.08310929387807846, "rewards/accgated_coverage_0": 0.03328470177948475, "rewards/accgated_coverage_1": 0.03328470177948475, "rewards/accgated_coverage_10": 0.03328111469745636, "rewards/accgated_coverage_15": 0.03311664015054703, "rewards/accgated_coverage_20": 0.02838711105287075, "rewards/accgated_coverage_25": 0.018937293067574502, "rewards/accgated_coverage_5": 0.03328470177948475, "rewards/accuracy_reward": 0.52744140625, "rewards/brier_reward": 0.8056316375732422, "rewards/confidence_uniqueness_reward": 0.9524389386177063, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0029492788482457398, "rewards/frontier_ece_reward": 0.0040812592953443525, "rewards/frontier_entropy_batch_reward": -0.20637467503547668, "signal/accgated_coverage_0/centered_abs_mean": 0.045438441634178164, "signal/accgated_coverage_0/group_std_mean": 0.05873422995209694, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00454384433105588, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00454384433105588, "signal/accgated_coverage_1/centered_abs_mean": 0.045438441634178164, "signal/accgated_coverage_1/group_std_mean": 0.05873422995209694, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00454384433105588, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00454384433105588, "signal/accgated_coverage_10/centered_abs_mean": 0.04543309956789017, "signal/accgated_coverage_10/group_std_mean": 0.058727345615625384, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004543309938162566, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004543309938162566, "signal/accgated_coverage_15/centered_abs_mean": 0.045103757083415984, "signal/accgated_coverage_15/group_std_mean": 0.05830207094550133, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0045103756710886955, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0045103756710886955, "signal/accgated_coverage_20/centered_abs_mean": 0.03521736077964306, "signal/accgated_coverage_20/group_std_mean": 0.045604909956455233, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0035217361990362408, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0035217361990362408, "signal/accgated_coverage_25/centered_abs_mean": 0.020598072186112405, "signal/accgated_coverage_25/group_std_mean": 0.026666931807994843, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.002059807279147208, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.002059807279147208, "signal/accgated_coverage_5/centered_abs_mean": 0.045438441634178164, "signal/accgated_coverage_5/group_std_mean": 0.05873422995209694, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00454384433105588, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00454384433105588, "signal/accuracy_reward/centered_abs_mean": 0.086187744140625, "signal/accuracy_reward/group_std_mean": 0.11752658784389496, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0430938720703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0430938720703125, "signal/advantage_abs_mean": 0.06388919427990913, "signal/advantage_pre_scale_abs_mean": 0.06388919427990913, "signal/advantage_pre_scale_std": 0.09822125136852264, "signal/advantage_std": 0.09822125136852264, "signal/brier_reward/centered_abs_mean": 0.12010153383016586, "signal/brier_reward/group_std_mean": 0.15592622756958008, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012010153383016586, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012010153383016586, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013271934166550637, "signal/confidence_uniqueness_reward/group_std_mean": 0.018078647926449774, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013271935051307083, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013271935051307083, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029013346415013076, "signal/frontier_aurc_reward/group_std_mean": 0.004842393286526203, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.626668330980465e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.626668330980465e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.005852928943932056, "signal/frontier_ece_reward/group_std_mean": 0.007757721655070782, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005852928617969156, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005852928617969156, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2692374408245087, "signal/frontier_entropy_batch_reward/group_std_mean": 0.345594185590744, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02692374512553215, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02692374512553215, "step": 220 }, { "calibration/aurc": 0.24788575025993992, "calibration/batch_distribution_entropy": 0.9844987021359772, "calibration/buffer_distribution_entropy": 0.9965920491455448, "calibration/confidence_entropy": 0.47441804644926594, "calibration/coverage@0%": 0.027768010029354206, "calibration/coverage@1%": 0.027768010029354206, "calibration/coverage@10%": 0.2899714102250489, "calibration/coverage@15%": 0.386472602739726, "calibration/coverage@20%": 0.4865054733365949, "calibration/coverage@25%": 0.5463062622309198, "calibration/coverage@30%": 0.6338513637475538, "calibration/coverage@5%": 0.14147352005870842, "calibration/ece": 0.12980114962746656, "calibration/mean_confidence": 0.5243642720011021, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 642.8, "completions/max_terminated_length": 642.8, "completions/mean_length": 207.5609375, "completions/mean_terminated_length": 207.58056030273437, "completions/min_length": 80.2, "completions/min_terminated_length": 100.8, "epoch": 0.72, "grad_norm": 0.0009799289982765913, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 760086605.0, "reward": 0.9555520176887512, "reward_std": 0.08307535648345947, "rewards/accgated_coverage_0": 0.03051256462931633, "rewards/accgated_coverage_1": 0.03051256462931633, "rewards/accgated_coverage_10": 0.03051130548119545, "rewards/accgated_coverage_15": 0.030381328240036965, "rewards/accgated_coverage_20": 0.02490551434457302, "rewards/accgated_coverage_25": 0.01838742271065712, "rewards/accgated_coverage_5": 0.03051256462931633, "rewards/accuracy_reward": 0.55830078125, "rewards/brier_reward": 0.8164411544799804, "rewards/confidence_uniqueness_reward": 0.9524749159812927, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002484840899705887, "rewards/frontier_ece_reward": 0.004225656203925609, "rewards/frontier_entropy_batch_reward": -0.20405004620552064, "signal/accgated_coverage_0/centered_abs_mean": 0.04963188543915749, "signal/accgated_coverage_0/group_std_mean": 0.06388133987784386, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004963188711553812, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004963188711553812, "signal/accgated_coverage_1/centered_abs_mean": 0.04963188543915749, "signal/accgated_coverage_1/group_std_mean": 0.06388133987784386, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004963188711553812, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004963188711553812, "signal/accgated_coverage_10/centered_abs_mean": 0.049627379328012464, "signal/accgated_coverage_10/group_std_mean": 0.06387575715780258, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004962737904861569, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004962737904861569, "signal/accgated_coverage_15/centered_abs_mean": 0.049022985994815825, "signal/accgated_coverage_15/group_std_mean": 0.06312333792448044, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004902298748493195, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004902298748493195, "signal/accgated_coverage_20/centered_abs_mean": 0.03493177182972431, "signal/accgated_coverage_20/group_std_mean": 0.045493639260530475, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0034931772388517857, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0034931772388517857, "signal/accgated_coverage_25/centered_abs_mean": 0.02032623775303364, "signal/accgated_coverage_25/group_std_mean": 0.02634006626904011, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.00203262388240546, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.00203262388240546, "signal/accgated_coverage_5/centered_abs_mean": 0.04963188543915749, "signal/accgated_coverage_5/group_std_mean": 0.06388133987784386, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004963188711553812, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004963188711553812, "signal/accuracy_reward/centered_abs_mean": 0.100262451171875, "signal/accuracy_reward/group_std_mean": 0.13359598368406295, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0501312255859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0501312255859375, "signal/advantage_abs_mean": 0.06428440287709236, "signal/advantage_pre_scale_abs_mean": 0.06428440287709236, "signal/advantage_pre_scale_std": 0.09776978790760041, "signal/advantage_std": 0.09776978790760041, "signal/brier_reward/centered_abs_mean": 0.10959625095129014, "signal/brier_reward/group_std_mean": 0.1432813137769699, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010959625616669655, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010959625616669655, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012161934934556484, "signal/confidence_uniqueness_reward/group_std_mean": 0.015593766607344151, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012161935213953257, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012161935213953257, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024530492490157487, "signal/frontier_aurc_reward/group_std_mean": 0.004177492624148726, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.066311764996499e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.066311764996499e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.005394628457725048, "signal/frontier_ece_reward/group_std_mean": 0.007084634527564049, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005394628620706499, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005394628620706499, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.263569763302803, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33859267830848694, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026356976479291916, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026356976479291916, "step": 225 }, { "calibration/aurc": 0.27484888127610524, "calibration/batch_distribution_entropy": 0.9747183047227347, "calibration/buffer_distribution_entropy": 0.996566131942201, "calibration/confidence_entropy": 0.47252951594941683, "calibration/coverage@0%": 0.029343505381604695, "calibration/coverage@1%": 0.029343505381604695, "calibration/coverage@10%": 0.13293098703522505, "calibration/coverage@15%": 0.18258087695694716, "calibration/coverage@20%": 0.39551278131115464, "calibration/coverage@25%": 0.5018078828277887, "calibration/coverage@30%": 0.633496667074364, "calibration/coverage@5%": 0.09656616927592955, "calibration/ece": 0.12579707874867294, "calibration/mean_confidence": 0.5529484226759458, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 582.8, "completions/max_terminated_length": 582.8, "completions/mean_length": 206.87109375, "completions/mean_terminated_length": 206.9926025390625, "completions/min_length": 0.0, "completions/min_terminated_length": 96.6, "epoch": 0.736, "grad_norm": 0.00072222959715873, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 777144549.0, "reward": 0.9504972577095032, "reward_std": 0.08325774669647217, "rewards/accgated_coverage_0": 0.0285327211022377, "rewards/accgated_coverage_1": 0.0285327211022377, "rewards/accgated_coverage_10": 0.02853233776986599, "rewards/accgated_coverage_15": 0.02839807290583849, "rewards/accgated_coverage_20": 0.0235455721616745, "rewards/accgated_coverage_25": 0.01733865328133106, "rewards/accgated_coverage_5": 0.0285327211022377, "rewards/accuracy_reward": 0.554296875, "rewards/brier_reward": 0.8032742261886596, "rewards/confidence_uniqueness_reward": 0.9524643421173096, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0029948077630251647, "rewards/frontier_ece_reward": 0.003622399689629674, "rewards/frontier_entropy_batch_reward": -0.20598133206367492, "signal/accgated_coverage_0/centered_abs_mean": 0.05004717260599136, "signal/accgated_coverage_0/group_std_mean": 0.06393795162439346, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005004717502743006, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005004717502743006, "signal/accgated_coverage_1/centered_abs_mean": 0.05004717260599136, "signal/accgated_coverage_1/group_std_mean": 0.06393795162439346, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005004717502743006, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005004717502743006, "signal/accgated_coverage_10/centered_abs_mean": 0.050046234577894214, "signal/accgated_coverage_10/group_std_mean": 0.06393673494458199, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005004623578861356, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005004623578861356, "signal/accgated_coverage_15/centered_abs_mean": 0.049556747823953626, "signal/accgated_coverage_15/group_std_mean": 0.0633281297981739, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004955675080418586, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004955675080418586, "signal/accgated_coverage_20/centered_abs_mean": 0.03371400721371174, "signal/accgated_coverage_20/group_std_mean": 0.04351087808609009, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0033714008051902054, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0033714008051902054, "signal/accgated_coverage_25/centered_abs_mean": 0.0200997706502676, "signal/accgated_coverage_25/group_std_mean": 0.02584904506802559, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.002009977027773857, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.002009977027773857, "signal/accgated_coverage_5/centered_abs_mean": 0.05004717260599136, "signal/accgated_coverage_5/group_std_mean": 0.06393795162439346, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005004717502743006, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005004717502743006, "signal/accuracy_reward/centered_abs_mean": 0.08973388671875, "signal/accuracy_reward/group_std_mean": 0.1211128681898117, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044866943359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.044866943359375, "signal/advantage_abs_mean": 0.06399662345647812, "signal/advantage_pre_scale_abs_mean": 0.06399662345647812, "signal/advantage_pre_scale_std": 0.09707808792591095, "signal/advantage_std": 0.09707808792591095, "signal/brier_reward/centered_abs_mean": 0.11655503362417222, "signal/brier_reward/group_std_mean": 0.15057767629623414, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011655503325164318, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011655503325164318, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012995177507400512, "signal/confidence_uniqueness_reward/group_std_mean": 0.018023890629410743, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012995177647098898, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012995177647098898, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629800856113, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002793784369714558, "signal/frontier_aurc_reward/group_std_mean": 0.004505346901714802, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.492230316624045e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.492230316624045e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.005415247846394777, "signal/frontier_ece_reward/group_std_mean": 0.007059116475284099, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000541524775326252, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000541524775326252, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27687177062034607, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35106891989707945, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027687177062034607, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027687177062034607, "step": 230 }, { "calibration/aurc": 0.2660196836183052, "calibration/batch_distribution_entropy": 0.9692057287077087, "calibration/buffer_distribution_entropy": 0.9965957295174226, "calibration/confidence_entropy": 0.47005762391068373, "calibration/coverage@0%": 0.044140625, "calibration/coverage@1%": 0.044140625, "calibration/coverage@10%": 0.19375, "calibration/coverage@15%": 0.26875, "calibration/coverage@20%": 0.378515625, "calibration/coverage@25%": 0.50078125, "calibration/coverage@30%": 0.623046875, "calibration/coverage@5%": 0.087109375, "calibration/ece": 0.12177968601544795, "calibration/mean_confidence": 0.46668987727826927, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 650.8, "completions/max_terminated_length": 650.8, "completions/mean_length": 206.2876953125, "completions/mean_terminated_length": 206.3489959716797, "completions/min_length": 41.4, "completions/min_terminated_length": 104.2, "epoch": 0.752, "grad_norm": 0.0007596755749545991, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 794484135.0, "reward": 0.9476239919662476, "reward_std": 0.08365670591592789, "rewards/accgated_coverage_0": 0.02502902615815401, "rewards/accgated_coverage_1": 0.02502902615815401, "rewards/accgated_coverage_10": 0.02502902615815401, "rewards/accgated_coverage_15": 0.02488698102533817, "rewards/accgated_coverage_20": 0.018017900735139848, "rewards/accgated_coverage_25": 0.014435861306264997, "rewards/accgated_coverage_5": 0.02502902615815401, "rewards/accuracy_reward": 0.55205078125, "rewards/brier_reward": 0.8026696920394898, "rewards/confidence_uniqueness_reward": 0.9525475263595581, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.00277663916349411, "rewards/frontier_ece_reward": 0.0033977875020354984, "rewards/frontier_entropy_batch_reward": -0.19778555929660796, "signal/accgated_coverage_0/centered_abs_mean": 0.048104815930128095, "signal/accgated_coverage_0/group_std_mean": 0.06266987174749375, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00481048165820539, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00481048165820539, "signal/accgated_coverage_1/centered_abs_mean": 0.048104815930128095, "signal/accgated_coverage_1/group_std_mean": 0.06266987174749375, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00481048165820539, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00481048165820539, "signal/accgated_coverage_10/centered_abs_mean": 0.048104815930128095, "signal/accgated_coverage_10/group_std_mean": 0.06266987174749375, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00481048165820539, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00481048165820539, "signal/accgated_coverage_15/centered_abs_mean": 0.04780370891094208, "signal/accgated_coverage_15/group_std_mean": 0.062286855280399324, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004780371021479368, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004780371021479368, "signal/accgated_coverage_20/centered_abs_mean": 0.034034205600619316, "signal/accgated_coverage_20/group_std_mean": 0.044576478004455564, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.00340342060662806, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.00340342060662806, "signal/accgated_coverage_25/centered_abs_mean": 0.019152706488966942, "signal/accgated_coverage_25/group_std_mean": 0.02505219243466854, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.001915270695462823, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.001915270695462823, "signal/accgated_coverage_5/centered_abs_mean": 0.048104815930128095, "signal/accgated_coverage_5/group_std_mean": 0.06266987174749375, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00481048165820539, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00481048165820539, "signal/accuracy_reward/centered_abs_mean": 0.091473388671875, "signal/accuracy_reward/group_std_mean": 0.12521128356456757, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0457366943359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0457366943359375, "signal/advantage_abs_mean": 0.06399512514472008, "signal/advantage_pre_scale_abs_mean": 0.06399512514472008, "signal/advantage_pre_scale_std": 0.09923620074987412, "signal/advantage_std": 0.09923620074987412, "signal/brier_reward/centered_abs_mean": 0.11026464402675629, "signal/brier_reward/group_std_mean": 0.1442580610513687, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011026464402675629, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011026464402675629, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012701518088579177, "signal/confidence_uniqueness_reward/group_std_mean": 0.017074212618172168, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012701518135145307, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012701518135145307, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024226987501606346, "signal/frontier_aurc_reward/group_std_mean": 0.0040211153216660024, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0283733940450476e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0283733940450476e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.004950394947081804, "signal/frontier_ece_reward/group_std_mean": 0.00653142724186182, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004950395144987851, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004950395144987851, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26609220504760744, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34021600484848025, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026609221845865248, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026609221845865248, "step": 235 }, { "calibration/aurc": 0.28641378674932866, "calibration/batch_distribution_entropy": 0.9841637432428328, "calibration/buffer_distribution_entropy": 0.9968625010942876, "calibration/confidence_entropy": 0.5034196161851294, "calibration/coverage@0%": 0.05117799045988258, "calibration/coverage@1%": 0.08711549045988258, "calibration/coverage@10%": 0.21445924045988257, "calibration/coverage@15%": 0.2980743945694716, "calibration/coverage@20%": 0.38250214041095887, "calibration/coverage@25%": 0.4681101700097847, "calibration/coverage@30%": 0.5697697529354208, "calibration/coverage@5%": 0.16211549045988258, "calibration/ece": 0.15185750949544422, "calibration/mean_confidence": 0.4985759031283967, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 743.0, "completions/max_terminated_length": 743.0, "completions/mean_length": 212.01640625, "completions/mean_terminated_length": 212.0368225097656, "completions/min_length": 81.2, "completions/min_terminated_length": 103.2, "epoch": 0.768, "grad_norm": 0.0006264409748837352, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 811587887.0, "reward": 0.9234783053398132, "reward_std": 0.08054482340812683, "rewards/accgated_coverage_0": 0.027358325943350793, "rewards/accgated_coverage_1": 0.027358325943350793, "rewards/accgated_coverage_10": 0.027348564751446248, "rewards/accgated_coverage_15": 0.027348769642412663, "rewards/accgated_coverage_20": 0.02053585313260555, "rewards/accgated_coverage_25": 0.014650637470185756, "rewards/accgated_coverage_5": 0.027357107028365135, "rewards/accuracy_reward": 0.49853515625, "rewards/brier_reward": 0.8120542526245117, "rewards/confidence_uniqueness_reward": 0.9526609897613525, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0028295089956372975, "rewards/frontier_ece_reward": 0.0036727309226989744, "rewards/frontier_entropy_batch_reward": -0.19739624857902527, "signal/accgated_coverage_0/centered_abs_mean": 0.04027009829878807, "signal/accgated_coverage_0/group_std_mean": 0.05210058838129043, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004027010034769773, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004027010034769773, "signal/accgated_coverage_1/centered_abs_mean": 0.04027009829878807, "signal/accgated_coverage_1/group_std_mean": 0.05210058838129043, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004027010034769773, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004027010034769773, "signal/accgated_coverage_10/centered_abs_mean": 0.04025917798280716, "signal/accgated_coverage_10/group_std_mean": 0.05208579152822494, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004025917826220393, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004025917826220393, "signal/accgated_coverage_15/centered_abs_mean": 0.039824848622083665, "signal/accgated_coverage_15/group_std_mean": 0.051523523032665254, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.003982485039159656, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.003982485039159656, "signal/accgated_coverage_20/centered_abs_mean": 0.025421395525336267, "signal/accgated_coverage_20/group_std_mean": 0.03290306515991688, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0025421395897865296, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0025421395897865296, "signal/accgated_coverage_25/centered_abs_mean": 0.015859098732471467, "signal/accgated_coverage_25/group_std_mean": 0.020310256630182266, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0015859099105000496, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0015859099105000496, "signal/accgated_coverage_5/centered_abs_mean": 0.040269167721271516, "signal/accgated_coverage_5/group_std_mean": 0.05209931433200836, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004026917088776827, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004026917088776827, "signal/accuracy_reward/centered_abs_mean": 0.083489990234375, "signal/accuracy_reward/group_std_mean": 0.11638489514589309, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0417449951171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0417449951171875, "signal/advantage_abs_mean": 0.06172212138772011, "signal/advantage_pre_scale_abs_mean": 0.06172212138772011, "signal/advantage_pre_scale_std": 0.09572511464357376, "signal/advantage_std": 0.09572511464357376, "signal/brier_reward/centered_abs_mean": 0.10978365540504456, "signal/brier_reward/group_std_mean": 0.1418785959482193, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010978365503251552, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010978365503251552, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011818506009876727, "signal/confidence_uniqueness_reward/group_std_mean": 0.015079454332590104, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00118185062892735, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00118185062892735, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002496037329547107, "signal/frontier_aurc_reward/group_std_mean": 0.004109069146215916, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.120046822004952e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.120046822004952e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.004792108759284019, "signal/frontier_ece_reward/group_std_mean": 0.006286134757101536, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004792108782567084, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004792108782567084, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2550091713666916, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3290919542312622, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025500917807221413, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025500917807221413, "step": 240 }, { "calibration/aurc": 0.32671411407310635, "calibration/batch_distribution_entropy": 0.9782006759163547, "calibration/buffer_distribution_entropy": 0.9969583395265618, "calibration/confidence_entropy": 0.4687766027810496, "calibration/coverage@0%": 0.02578125, "calibration/coverage@1%": 0.02578125, "calibration/coverage@10%": 0.170703125, "calibration/coverage@15%": 0.25546875, "calibration/coverage@20%": 0.308203125, "calibration/coverage@25%": 0.359375, "calibration/coverage@30%": 0.471875, "calibration/coverage@5%": 0.133984375, "calibration/ece": 0.14992271913099717, "calibration/mean_confidence": 0.5172538257230894, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 745.2, "completions/max_terminated_length": 745.2, "completions/mean_length": 211.4888671875, "completions/mean_terminated_length": 211.57101745605468, "completions/min_length": 20.4, "completions/min_terminated_length": 101.6, "epoch": 0.784, "grad_norm": 0.000795921718236059, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 828927901.0, "reward": 0.9365089654922485, "reward_std": 0.0868492677807808, "rewards/accgated_coverage_0": 0.02046150788664818, "rewards/accgated_coverage_1": 0.02046150788664818, "rewards/accgated_coverage_10": 0.020463902130723, "rewards/accgated_coverage_15": 0.01999166887253523, "rewards/accgated_coverage_20": 0.016766261495649814, "rewards/accgated_coverage_25": 0.01441083662211895, "rewards/accgated_coverage_5": 0.02046313285827637, "rewards/accuracy_reward": 0.53837890625, "rewards/brier_reward": 0.7908718585968018, "rewards/confidence_uniqueness_reward": 0.9525047183036804, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0030904591083526613, "rewards/frontier_ece_reward": 0.0025389259913936256, "rewards/frontier_entropy_batch_reward": -0.20339978933334352, "signal/accgated_coverage_0/centered_abs_mean": 0.051776818186044696, "signal/accgated_coverage_0/group_std_mean": 0.06660348773002625, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00517768207937479, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00517768207937479, "signal/accgated_coverage_1/centered_abs_mean": 0.051776818186044696, "signal/accgated_coverage_1/group_std_mean": 0.06660348773002625, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00517768207937479, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00517768207937479, "signal/accgated_coverage_10/centered_abs_mean": 0.05176556333899498, "signal/accgated_coverage_10/group_std_mean": 0.06658939719200134, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0051765562500804664, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0051765562500804664, "signal/accgated_coverage_15/centered_abs_mean": 0.050340484082698825, "signal/accgated_coverage_15/group_std_mean": 0.06477305367588997, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0050340484827756885, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0050340484827756885, "signal/accgated_coverage_20/centered_abs_mean": 0.03024843893945217, "signal/accgated_coverage_20/group_std_mean": 0.03920280672609806, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0030248438473790885, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0030248438473790885, "signal/accgated_coverage_25/centered_abs_mean": 0.018703461810946464, "signal/accgated_coverage_25/group_std_mean": 0.02392947468906641, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0018703461857512594, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0018703461857512594, "signal/accgated_coverage_5/centered_abs_mean": 0.05176918432116508, "signal/accgated_coverage_5/group_std_mean": 0.06659393087029457, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005176918627694249, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005176918627694249, "signal/accuracy_reward/centered_abs_mean": 0.105316162109375, "signal/accuracy_reward/group_std_mean": 0.13590774238109588, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0526580810546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0526580810546875, "signal/advantage_abs_mean": 0.0690420001745224, "signal/advantage_pre_scale_abs_mean": 0.0690420001745224, "signal/advantage_pre_scale_std": 0.10285361707210541, "signal/advantage_std": 0.10285361707210541, "signal/brier_reward/centered_abs_mean": 0.11876424252986909, "signal/brier_reward/group_std_mean": 0.15292527675628662, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011876424588263036, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011876424588263036, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012877122312784196, "signal/confidence_uniqueness_reward/group_std_mean": 0.01730199046432972, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001287712249904871, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001287712249904871, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027613468701019883, "signal/frontier_aurc_reward/group_std_mean": 0.004532812442630529, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.451683660387062e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.451683660387062e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.004731657728552818, "signal/frontier_ece_reward/group_std_mean": 0.006212034169584513, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004731657856609672, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004731657856609672, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2753194272518158, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3491884648799896, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02753194384276867, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02753194384276867, "step": 245 }, { "calibration/aurc": 0.22368977492292014, "calibration/batch_distribution_entropy": 0.9765782788641488, "calibration/buffer_distribution_entropy": 0.9968784343575339, "calibration/confidence_entropy": 0.4779808267726235, "calibration/coverage@0%": 0.01875840875733855, "calibration/coverage@1%": 0.04922715875733855, "calibration/coverage@10%": 0.22823966487279845, "calibration/coverage@15%": 0.3611607142857143, "calibration/coverage@20%": 0.5086227984344422, "calibration/coverage@25%": 0.6490284063111545, "calibration/coverage@30%": 0.722919214774951, "calibration/coverage@5%": 0.14610215875733856, "calibration/ece": 0.11821748282284754, "calibration/mean_confidence": 0.4770415499416723, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 582.2, "completions/max_terminated_length": 582.2, "completions/mean_length": 212.08173828125, "completions/mean_terminated_length": 212.14385375976562, "completions/min_length": 42.4, "completions/min_terminated_length": 105.8, "epoch": 0.8, "grad_norm": 0.0008366380352526903, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 846110178.0, "reward": 0.9612038016319275, "reward_std": 0.07943681925535202, "rewards/accgated_coverage_0": 0.03096870370209217, "rewards/accgated_coverage_1": 0.03096870370209217, "rewards/accgated_coverage_10": 0.03096686936914921, "rewards/accgated_coverage_15": 0.029813123494386674, "rewards/accgated_coverage_20": 0.022094443440437317, "rewards/accgated_coverage_25": 0.01778464615345001, "rewards/accgated_coverage_5": 0.030968816578388215, "rewards/accuracy_reward": 0.57333984375, "rewards/brier_reward": 0.8080012440681458, "rewards/confidence_uniqueness_reward": 0.9516256332397461, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0028674704488366844, "rewards/frontier_ece_reward": 0.0028016922762617467, "rewards/frontier_entropy_batch_reward": -0.20883174240589142, "signal/accgated_coverage_0/centered_abs_mean": 0.05066419094800949, "signal/accgated_coverage_0/group_std_mean": 0.06535674557089806, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005066419020295143, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005066419020295143, "signal/accgated_coverage_1/centered_abs_mean": 0.05066419094800949, "signal/accgated_coverage_1/group_std_mean": 0.06535674557089806, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005066419020295143, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005066419020295143, "signal/accgated_coverage_10/centered_abs_mean": 0.050653228908777236, "signal/accgated_coverage_10/group_std_mean": 0.06534308791160584, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005065322946757078, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005065322946757078, "signal/accgated_coverage_15/centered_abs_mean": 0.04801043793559075, "signal/accgated_coverage_15/group_std_mean": 0.06195661723613739, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00480104386806488, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00480104386806488, "signal/accgated_coverage_20/centered_abs_mean": 0.02948525659739971, "signal/accgated_coverage_20/group_std_mean": 0.03847551196813583, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0029485255479812624, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0029485255479812624, "signal/accgated_coverage_25/centered_abs_mean": 0.01872854121029377, "signal/accgated_coverage_25/group_std_mean": 0.024161659926176072, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.00187285419087857, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.00187285419087857, "signal/accgated_coverage_5/centered_abs_mean": 0.05066225081682205, "signal/accgated_coverage_5/group_std_mean": 0.0653543896973133, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005066225025802851, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005066225025802851, "signal/accuracy_reward/centered_abs_mean": 0.092132568359375, "signal/accuracy_reward/group_std_mean": 0.11959045678377152, "signal/accuracy_reward/group_zero_std_frac": 0.665625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0460662841796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0460662841796875, "signal/advantage_abs_mean": 0.0625168263912201, "signal/advantage_pre_scale_abs_mean": 0.0625168263912201, "signal/advantage_pre_scale_std": 0.09564936310052871, "signal/advantage_std": 0.09564936310052871, "signal/brier_reward/centered_abs_mean": 0.10817041993141174, "signal/brier_reward/group_std_mean": 0.13942115157842636, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010817042179405689, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010817042179405689, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013105977326631546, "signal/confidence_uniqueness_reward/group_std_mean": 0.01724378876388073, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001310597755946219, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001310597755946219, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025355865713208915, "signal/frontier_aurc_reward/group_std_mean": 0.004124428937211632, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.169483316014521e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.169483316014521e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.00470409793779254, "signal/frontier_ece_reward/group_std_mean": 0.006183451414108277, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00047040980425663295, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00047040980425663295, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26716206073760984, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33822156190872193, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0267162062227726, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0267162062227726, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.46588043488860353, "eval_calibration/batch_distribution_entropy": 0.9194377245046288, "eval_calibration/buffer_distribution_entropy": 0.9969822940081324, "eval_calibration/confidence_entropy": 0.47392606359196077, "eval_calibration/coverage@0%": 0.0390625, "eval_calibration/coverage@1%": 0.0390625, "eval_calibration/coverage@10%": 0.0390625, "eval_calibration/coverage@15%": 0.0390625, "eval_calibration/coverage@20%": 0.1953125, "eval_calibration/coverage@25%": 0.25, "eval_calibration/coverage@30%": 0.328125, "eval_calibration/coverage@5%": 0.0390625, "eval_calibration/ece": 0.17888477450331003, "eval_calibration/mean_confidence": 0.4776080658371867, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 390.0, "eval_completions/max_terminated_length": 390.0, "eval_completions/mean_length": 217.4969024658203, "eval_completions/mean_terminated_length": 217.4969024658203, "eval_completions/min_length": 124.75, "eval_completions/min_terminated_length": 124.75, "eval_loss": 0.0, "eval_num_tokens": 846110178.0, "eval_reward": 0.8083221763372421, "eval_reward_std": 0.2187192626297474, "eval_rewards/accgated_coverage_0": 0.03624272719025612, "eval_rewards/accgated_coverage_1": 0.03624272719025612, "eval_rewards/accgated_coverage_10": 0.03623865591362119, "eval_rewards/accgated_coverage_15": 0.0346625130623579, "eval_rewards/accgated_coverage_20": 0.022707284428179264, "eval_rewards/accgated_coverage_25": 0.013365113409236073, "eval_rewards/accgated_coverage_5": 0.03624272719025612, "eval_rewards/accuracy_reward": 0.43359375, "eval_rewards/brier_reward": 0.8064423203468323, "eval_rewards/confidence_uniqueness_reward": 0.89013671875, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0033500646241009235, "eval_rewards/frontier_ece_reward": 0.0033910262282006443, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 19.9553, "eval_samples_per_second": 25.056, "eval_signal/accgated_coverage_0/centered_abs_mean": 0.07163327932357788, "eval_signal/accgated_coverage_0/group_std_mean": 0.08617032133042812, "eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007163328235037625, "eval_signal/accgated_coverage_0/weight": 0.10000000149011612, "eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007163328235037625, "eval_signal/accgated_coverage_1/centered_abs_mean": 0.07163327932357788, "eval_signal/accgated_coverage_1/group_std_mean": 0.08617032133042812, "eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007163328235037625, "eval_signal/accgated_coverage_1/weight": 0.10000000149011612, "eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007163328235037625, "eval_signal/accgated_coverage_10/centered_abs_mean": 0.07161696534603834, "eval_signal/accgated_coverage_10/group_std_mean": 0.08615143597126007, "eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007161696790717542, "eval_signal/accgated_coverage_10/weight": 0.10000000149011612, "eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007161696790717542, "eval_signal/accgated_coverage_15/centered_abs_mean": 0.06835009530186653, "eval_signal/accgated_coverage_15/group_std_mean": 0.08244646713137627, "eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006835010135546327, "eval_signal/accgated_coverage_15/weight": 0.10000000149011612, "eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006835010135546327, "eval_signal/accgated_coverage_20/centered_abs_mean": 0.039841676130890846, "eval_signal/accgated_coverage_20/group_std_mean": 0.04926642868667841, "eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.003984167706221342, "eval_signal/accgated_coverage_20/weight": 0.10000000149011612, "eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.003984167706221342, "eval_signal/accgated_coverage_25/centered_abs_mean": 0.019560284446924925, "eval_signal/accgated_coverage_25/group_std_mean": 0.02460642997175455, "eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0019560285727493465, "eval_signal/accgated_coverage_25/weight": 0.10000000149011612, "eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0019560285727493465, "eval_signal/accgated_coverage_5/centered_abs_mean": 0.07163327932357788, "eval_signal/accgated_coverage_5/group_std_mean": 0.08617032133042812, "eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007163328235037625, "eval_signal/accgated_coverage_5/weight": 0.10000000149011612, "eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007163328235037625, "eval_signal/accuracy_reward/centered_abs_mean": 0.478271484375, "eval_signal/accuracy_reward/group_std_mean": 0.49664156883955, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2391357421875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2391357421875, "eval_signal/advantage_abs_mean": 0.20329082757234573, "eval_signal/advantage_pre_scale_abs_mean": 0.20329082757234573, "eval_signal/advantage_pre_scale_std": 0.21654605492949486, "eval_signal/advantage_std": 0.21654605492949486, "eval_signal/brier_reward/centered_abs_mean": 0.1918768174946308, "eval_signal/brier_reward/group_std_mean": 0.24410802125930786, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01918768184259534, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01918768184259534, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0425872802734375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.051558976992964745, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004258728236891329, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004258728236891329, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0042505175224505365, "eval_signal/frontier_aurc_reward/group_std_mean": 0.00803718576207757, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.313147175911581e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.313147175911581e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.00573124154470861, "eval_signal/frontier_ece_reward/group_std_mean": 0.007669370388612151, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005731241835746914, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005731241835746914, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.2, "step": 250 }, { "calibration/aurc": 0.25161858829445716, "calibration/batch_distribution_entropy": 0.9604955477620883, "calibration/buffer_distribution_entropy": 0.9968190693662825, "calibration/confidence_entropy": 0.4511546023403012, "calibration/coverage@0%": 0.02621238992172211, "calibration/coverage@1%": 0.02621238992172211, "calibration/coverage@10%": 0.11419551125244617, "calibration/coverage@15%": 0.19276388209393347, "calibration/coverage@20%": 0.2979123348825832, "calibration/coverage@25%": 0.68237448018591, "calibration/coverage@30%": 0.8054488747553815, "calibration/coverage@5%": 0.06257873654598825, "calibration/ece": 0.13867088896719254, "calibration/mean_confidence": 0.5401082680443932, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 639.8, "completions/max_terminated_length": 639.8, "completions/mean_length": 215.06240234375, "completions/mean_terminated_length": 215.10444030761718, "completions/min_length": 63.4, "completions/min_terminated_length": 105.2, "epoch": 0.816, "grad_norm": 0.0009888941422104836, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 863411585.0, "reward": 0.953099250793457, "reward_std": 0.08940376788377762, "rewards/accgated_coverage_0": 0.019826328055933116, "rewards/accgated_coverage_1": 0.019826328055933116, "rewards/accgated_coverage_10": 0.01982517270371318, "rewards/accgated_coverage_15": 0.01932367868721485, "rewards/accgated_coverage_20": 0.01630774438381195, "rewards/accgated_coverage_25": 0.014926259219646455, "rewards/accgated_coverage_5": 0.019826328055933116, "rewards/accuracy_reward": 0.57373046875, "rewards/brier_reward": 0.7885306715965271, "rewards/confidence_uniqueness_reward": 0.9522530317306519, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003217545850202441, "rewards/frontier_ece_reward": 0.0024582074489444493, "rewards/frontier_entropy_batch_reward": -0.20889662504196166, "signal/accgated_coverage_0/centered_abs_mean": 0.05354453325271606, "signal/accgated_coverage_0/group_std_mean": 0.06835601478815079, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005354453343898058, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005354453343898058, "signal/accgated_coverage_1/centered_abs_mean": 0.05354453325271606, "signal/accgated_coverage_1/group_std_mean": 0.06835601478815079, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005354453343898058, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005354453343898058, "signal/accgated_coverage_10/centered_abs_mean": 0.05353115946054458, "signal/accgated_coverage_10/group_std_mean": 0.06833992823958397, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005353116150945425, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005353116150945425, "signal/accgated_coverage_15/centered_abs_mean": 0.05107894092798233, "signal/accgated_coverage_15/group_std_mean": 0.06524311304092408, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.005107894167304039, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.005107894167304039, "signal/accgated_coverage_20/centered_abs_mean": 0.0296579971909523, "signal/accgated_coverage_20/group_std_mean": 0.03823278471827507, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.002965799765661359, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.002965799765661359, "signal/accgated_coverage_25/centered_abs_mean": 0.0198915496468544, "signal/accgated_coverage_25/group_std_mean": 0.025325778871774673, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0019891550531610847, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0019891550531610847, "signal/accgated_coverage_5/centered_abs_mean": 0.05354453325271606, "signal/accgated_coverage_5/group_std_mean": 0.06835601478815079, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005354453343898058, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005354453343898058, "signal/accuracy_reward/centered_abs_mean": 0.107867431640625, "signal/accuracy_reward/group_std_mean": 0.13981811404228212, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0539337158203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0539337158203125, "signal/advantage_abs_mean": 0.07016213089227677, "signal/advantage_pre_scale_abs_mean": 0.07016213089227677, "signal/advantage_pre_scale_std": 0.10725255310535431, "signal/advantage_std": 0.10725255310535431, "signal/brier_reward/centered_abs_mean": 0.12337229251861573, "signal/brier_reward/group_std_mean": 0.15659765005111695, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012337229400873184, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012337229400873184, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012703911028802395, "signal/confidence_uniqueness_reward/group_std_mean": 0.016831454075872897, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012703910935670138, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012703910935670138, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.00313384085893631, "signal/frontier_aurc_reward/group_std_mean": 0.005052349204197526, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9173011464299635e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9173011464299635e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.004800934810191393, "signal/frontier_ece_reward/group_std_mean": 0.006293817330151797, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00048009351012296976, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00048009351012296976, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2696466028690338, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34208568930625916, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026964660733938217, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026964660733938217, "step": 255 }, { "calibration/aurc": 0.24765935565406477, "calibration/batch_distribution_entropy": 0.9672541614417518, "calibration/buffer_distribution_entropy": 0.9964371976653, "calibration/confidence_entropy": 0.4697584302475404, "calibration/coverage@0%": 0.08489413659299336, "calibration/coverage@1%": 0.09467887240512643, "calibration/coverage@10%": 0.28215328340240203, "calibration/coverage@15%": 0.33649990646943706, "calibration/coverage@20%": 0.41351886259640847, "calibration/coverage@25%": 0.5323392833160662, "calibration/coverage@30%": 0.6261550934586163, "calibration/coverage@5%": 0.21994440327500864, "calibration/ece": 0.09697334967073587, "calibration/mean_confidence": 0.48439683040980785, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 697.6, "completions/max_terminated_length": 697.6, "completions/mean_length": 221.90908203125, "completions/mean_terminated_length": 222.12615051269532, "completions/min_length": 43.4, "completions/min_terminated_length": 106.4, "epoch": 0.832, "grad_norm": 0.0006920217419974506, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 880692286.0, "reward": 0.9494648218154907, "reward_std": 0.08398343473672867, "rewards/accgated_coverage_0": 0.03729419596493244, "rewards/accgated_coverage_1": 0.03729419596493244, "rewards/accgated_coverage_10": 0.03728927373886108, "rewards/accgated_coverage_15": 0.03530678525567055, "rewards/accgated_coverage_20": 0.024978424608707427, "rewards/accgated_coverage_25": 0.020059302635490894, "rewards/accgated_coverage_5": 0.03729419596493244, "rewards/accuracy_reward": 0.5490234375, "rewards/brier_reward": 0.8231766700744629, "rewards/confidence_uniqueness_reward": 0.9490555763244629, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.002319850795902312, "rewards/frontier_ece_reward": 0.00325658256188035, "rewards/frontier_entropy_batch_reward": -0.2503013014793396, "signal/accgated_coverage_0/centered_abs_mean": 0.04462068974971771, "signal/accgated_coverage_0/group_std_mean": 0.05807742029428482, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004462068993598222, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004462068993598222, "signal/accgated_coverage_1/centered_abs_mean": 0.04462068974971771, "signal/accgated_coverage_1/group_std_mean": 0.05807742029428482, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004462068993598222, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004462068993598222, "signal/accgated_coverage_10/centered_abs_mean": 0.04461221098899841, "signal/accgated_coverage_10/group_std_mean": 0.058065980672836304, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004461221117526293, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004461221117526293, "signal/accgated_coverage_15/centered_abs_mean": 0.04177615866065025, "signal/accgated_coverage_15/group_std_mean": 0.054360844939947126, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004177615791559219, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004177615791559219, "signal/accgated_coverage_20/centered_abs_mean": 0.026147307828068733, "signal/accgated_coverage_20/group_std_mean": 0.033843887597322465, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0026147309225052597, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0026147309225052597, "signal/accgated_coverage_25/centered_abs_mean": 0.017483064904808998, "signal/accgated_coverage_25/group_std_mean": 0.02221609316766262, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0017483065836131572, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0017483065836131572, "signal/accgated_coverage_5/centered_abs_mean": 0.04462068974971771, "signal/accgated_coverage_5/group_std_mean": 0.05807742029428482, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004462068993598222, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004462068993598222, "signal/accuracy_reward/centered_abs_mean": 0.0932373046875, "signal/accuracy_reward/group_std_mean": 0.12756348997354508, "signal/accuracy_reward/group_zero_std_frac": 0.621875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04661865234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04661865234375, "signal/advantage_abs_mean": 0.06364405304193496, "signal/advantage_pre_scale_abs_mean": 0.06364405304193496, "signal/advantage_pre_scale_std": 0.10019244253635406, "signal/advantage_std": 0.10019244253635406, "signal/brier_reward/centered_abs_mean": 0.10104852169752121, "signal/brier_reward/group_std_mean": 0.132098488509655, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010104852169752121, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010104852169752121, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01508812140673399, "signal/confidence_uniqueness_reward/group_std_mean": 0.02120809331536293, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001508812210522592, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001508812210522592, "signal/format_reward/centered_abs_mean": 0.00186767578125, "signal/format_reward/group_std_mean": 0.00485165468417108, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000933837890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000933837890625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018905135337263345, "signal/frontier_aurc_reward/group_std_mean": 0.0030176178086549045, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3631419753655792e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3631419753655792e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.004522326309233904, "signal/frontier_ece_reward/group_std_mean": 0.005910783167928457, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004522326402366161, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004522326402366161, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29563444256782534, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36747732758522034, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02956344522535801, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02956344522535801, "step": 260 }, { "calibration/aurc": 0.3271325458178449, "calibration/batch_distribution_entropy": 0.9662770451559457, "calibration/buffer_distribution_entropy": 0.9963845668571439, "calibration/confidence_entropy": 0.4835711156903063, "calibration/coverage@0%": 0.008203125, "calibration/coverage@1%": 0.008203125, "calibration/coverage@10%": 0.185546875, "calibration/coverage@15%": 0.237109375, "calibration/coverage@20%": 0.294140625, "calibration/coverage@25%": 0.475, "calibration/coverage@30%": 0.523046875, "calibration/coverage@5%": 0.13203125, "calibration/ece": 0.14876966321369578, "calibration/mean_confidence": 0.5546630362367326, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 968.2, "completions/max_terminated_length": 968.2, "completions/mean_length": 222.33134765625, "completions/mean_terminated_length": 222.5053680419922, "completions/min_length": 22.0, "completions/min_terminated_length": 108.2, "epoch": 0.848, "grad_norm": 0.0007502317312173545, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 897983327.0, "reward": 0.9353123426437377, "reward_std": 0.08215008527040482, "rewards/accgated_coverage_0": 0.029652020335197447, "rewards/accgated_coverage_1": 0.029652020335197447, "rewards/accgated_coverage_10": 0.029647645354270936, "rewards/accgated_coverage_15": 0.027789150178432465, "rewards/accgated_coverage_20": 0.018689888529479504, "rewards/accgated_coverage_25": 0.01452134121209383, "rewards/accgated_coverage_5": 0.029652020335197447, "rewards/accuracy_reward": 0.52490234375, "rewards/brier_reward": 0.8078467130661011, "rewards/confidence_uniqueness_reward": 0.9516240000724793, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.003087950637564063, "rewards/frontier_ece_reward": 0.003099389187991619, "rewards/frontier_entropy_batch_reward": -0.20927042365074158, "signal/accgated_coverage_0/centered_abs_mean": 0.03783770278096199, "signal/accgated_coverage_0/group_std_mean": 0.050519751757383345, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.003783770464360714, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.003783770464360714, "signal/accgated_coverage_1/centered_abs_mean": 0.03783770278096199, "signal/accgated_coverage_1/group_std_mean": 0.050519751757383345, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.003783770464360714, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.003783770464360714, "signal/accgated_coverage_10/centered_abs_mean": 0.03783150315284729, "signal/accgated_coverage_10/group_std_mean": 0.0505112536251545, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.003783150389790535, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.003783150389790535, "signal/accgated_coverage_15/centered_abs_mean": 0.0356540959328413, "signal/accgated_coverage_15/group_std_mean": 0.04766347408294678, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.003565409732982516, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.003565409732982516, "signal/accgated_coverage_20/centered_abs_mean": 0.021960367262363435, "signal/accgated_coverage_20/group_std_mean": 0.029427655786275864, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.002196036884561181, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.002196036884561181, "signal/accgated_coverage_25/centered_abs_mean": 0.01458423975855112, "signal/accgated_coverage_25/group_std_mean": 0.0191590566188097, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0014584239572286606, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0014584239572286606, "signal/accgated_coverage_5/centered_abs_mean": 0.03783770278096199, "signal/accgated_coverage_5/group_std_mean": 0.050519751757383345, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.003783770464360714, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.003783770464360714, "signal/accuracy_reward/centered_abs_mean": 0.082537841796875, "signal/accuracy_reward/group_std_mean": 0.11387998014688491, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0412689208984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0412689208984375, "signal/advantage_abs_mean": 0.06302751824259759, "signal/advantage_pre_scale_abs_mean": 0.06302751824259759, "signal/advantage_pre_scale_std": 0.1000214621424675, "signal/advantage_std": 0.1000214621424675, "signal/brier_reward/centered_abs_mean": 0.10466258078813553, "signal/brier_reward/group_std_mean": 0.13769466578960418, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010466258227825164, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010466258227825164, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013133116811513901, "signal/confidence_uniqueness_reward/group_std_mean": 0.018831767141819, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013133117696270346, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013133117696270346, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_std_mean": 0.004419417306780815, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002753878058865666, "signal/frontier_aurc_reward/group_std_mean": 0.004520110785961151, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.442347588133998e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.442347588133998e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.004349597357213497, "signal/frontier_ece_reward/group_std_mean": 0.005748180858790875, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00043495974387042224, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00043495974387042224, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27231648564338684, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34300028085708617, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027231648564338684, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027231648564338684, "step": 265 }, { "calibration/aurc": 0.2760374266890062, "calibration/batch_distribution_entropy": 0.9531517789072197, "calibration/buffer_distribution_entropy": 0.9965140749004997, "calibration/confidence_entropy": 0.49118011044271037, "calibration/coverage@0%": 0.008213062622309197, "calibration/coverage@1%": 0.008213062622309197, "calibration/coverage@10%": 0.13416554549902152, "calibration/coverage@15%": 0.21314747431506847, "calibration/coverage@20%": 0.2542097296966732, "calibration/coverage@25%": 0.38098244863013697, "calibration/coverage@30%": 0.5280416768590999, "calibration/coverage@5%": 0.07275180406066536, "calibration/ece": 0.13082628746087527, "calibration/mean_confidence": 0.6128360015117618, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 956.8, "completions/max_terminated_length": 956.8, "completions/mean_length": 223.86318359375, "completions/mean_terminated_length": 223.99449462890624, "completions/min_length": 20.0, "completions/min_terminated_length": 110.0, "epoch": 0.864, "grad_norm": 0.0009761390392668545, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 915262502.0, "reward": 0.9546477913856506, "reward_std": 0.0886962041258812, "rewards/accgated_coverage_0": 0.023132944479584694, "rewards/accgated_coverage_1": 0.023132944479584694, "rewards/accgated_coverage_10": 0.023133278265595435, "rewards/accgated_coverage_15": 0.02238629199564457, "rewards/accgated_coverage_20": 0.017566022649407386, "rewards/accgated_coverage_25": 0.016244550049304963, "rewards/accgated_coverage_5": 0.023132944479584694, "rewards/accuracy_reward": 0.57900390625, "rewards/brier_reward": 0.7994905114173889, "rewards/confidence_uniqueness_reward": 0.9505669474601746, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0027686028741300108, "rewards/frontier_ece_reward": 0.002419534232467413, "rewards/frontier_entropy_batch_reward": -0.24598353505134582, "signal/accgated_coverage_0/centered_abs_mean": 0.05085097923874855, "signal/accgated_coverage_0/group_std_mean": 0.06552043557167053, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.005085097998380661, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.005085097998380661, "signal/accgated_coverage_1/centered_abs_mean": 0.05085097923874855, "signal/accgated_coverage_1/group_std_mean": 0.06552043557167053, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.005085097998380661, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.005085097998380661, "signal/accgated_coverage_10/centered_abs_mean": 0.050848403573036195, "signal/accgated_coverage_10/group_std_mean": 0.0655171237885952, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00508484048768878, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00508484048768878, "signal/accgated_coverage_15/centered_abs_mean": 0.04747554138302803, "signal/accgated_coverage_15/group_std_mean": 0.061189302057027814, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004747554380446673, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004747554380446673, "signal/accgated_coverage_20/centered_abs_mean": 0.028483838215470313, "signal/accgated_coverage_20/group_std_mean": 0.036812521517276764, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0028483838308602572, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0028483838308602572, "signal/accgated_coverage_25/centered_abs_mean": 0.01881438195705414, "signal/accgated_coverage_25/group_std_mean": 0.024093781784176826, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0018814382376149297, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0018814382376149297, "signal/accgated_coverage_5/centered_abs_mean": 0.05085097923874855, "signal/accgated_coverage_5/group_std_mean": 0.06552043557167053, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.005085097998380661, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.005085097998380661, "signal/accuracy_reward/centered_abs_mean": 0.102349853515625, "signal/accuracy_reward/group_std_mean": 0.13482767790555955, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0511749267578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0511749267578125, "signal/advantage_abs_mean": 0.0686701402068138, "signal/advantage_pre_scale_abs_mean": 0.0686701402068138, "signal/advantage_pre_scale_std": 0.1049189954996109, "signal/advantage_std": 0.1049189954996109, "signal/brier_reward/centered_abs_mean": 0.11167400926351548, "signal/brier_reward/group_std_mean": 0.14373133778572084, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011167401075363159, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011167401075363159, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013636622577905655, "signal/confidence_uniqueness_reward/group_std_mean": 0.019001100584864615, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00136366228107363, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00136366228107363, "signal/format_reward/centered_abs_mean": 0.001312255859375, "signal/format_reward/group_std_mean": 0.0035306816920638085, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025581192690879106, "signal/frontier_aurc_reward/group_std_mean": 0.004242032580077648, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.19764920277521e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.19764920277521e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.004280299786478281, "signal/frontier_ece_reward/group_std_mean": 0.005746448040008545, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00042802998214028774, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00042802998214028774, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2921872317790985, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3625770092010498, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029218722507357596, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029218722507357596, "step": 270 }, { "calibration/aurc": 0.37473883274386777, "calibration/batch_distribution_entropy": 0.9743970080640647, "calibration/buffer_distribution_entropy": 0.996299853626935, "calibration/confidence_entropy": 0.46121785606815846, "calibration/coverage@0%": 0.017578125, "calibration/coverage@1%": 0.017578125, "calibration/coverage@10%": 0.046900226272015656, "calibration/coverage@15%": 0.07934885640900195, "calibration/coverage@20%": 0.1508920927103718, "calibration/coverage@25%": 0.2279247186888454, "calibration/coverage@30%": 0.3037579500978474, "calibration/coverage@5%": 0.019140625, "calibration/ece": 0.13767695593114995, "calibration/mean_confidence": 0.515725347280679, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 716.4, "completions/max_terminated_length": 716.4, "completions/mean_length": 220.306640625, "completions/mean_terminated_length": 220.3911346435547, "completions/min_length": 40.4, "completions/min_terminated_length": 102.0, "epoch": 0.88, "grad_norm": 0.0008852293249219656, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 932665514.0, "reward": 0.924151074886322, "reward_std": 0.08492428660392762, "rewards/accgated_coverage_0": 0.029650628566741943, "rewards/accgated_coverage_1": 0.029650628566741943, "rewards/accgated_coverage_10": 0.029650628566741943, "rewards/accgated_coverage_15": 0.02788240723311901, "rewards/accgated_coverage_20": 0.01842728815972805, "rewards/accgated_coverage_25": 0.014962680265307426, "rewards/accgated_coverage_5": 0.029650628566741943, "rewards/accuracy_reward": 0.5060546875, "rewards/brier_reward": 0.8013368129730225, "rewards/confidence_uniqueness_reward": 0.9511494755744934, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0028143803123384715, "rewards/frontier_ece_reward": 0.0030478714033961296, "rewards/frontier_entropy_batch_reward": -0.22137869000434876, "signal/accgated_coverage_0/centered_abs_mean": 0.04147433005273342, "signal/accgated_coverage_0/group_std_mean": 0.05344668477773666, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0041474332101643085, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0041474332101643085, "signal/accgated_coverage_1/centered_abs_mean": 0.04147433005273342, "signal/accgated_coverage_1/group_std_mean": 0.05344668477773666, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0041474332101643085, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0041474332101643085, "signal/accgated_coverage_10/centered_abs_mean": 0.04147433005273342, "signal/accgated_coverage_10/group_std_mean": 0.05344668477773666, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0041474332101643085, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0041474332101643085, "signal/accgated_coverage_15/centered_abs_mean": 0.03853954002261162, "signal/accgated_coverage_15/group_std_mean": 0.04971724823117256, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0038539541885256766, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0038539541885256766, "signal/accgated_coverage_20/centered_abs_mean": 0.02363501489162445, "signal/accgated_coverage_20/group_std_mean": 0.030636246129870415, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.002363501605577767, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.002363501605577767, "signal/accgated_coverage_25/centered_abs_mean": 0.015382234752178193, "signal/accgated_coverage_25/group_std_mean": 0.019744027778506278, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0015382234705612063, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0015382234705612063, "signal/accgated_coverage_5/centered_abs_mean": 0.04147433005273342, "signal/accgated_coverage_5/group_std_mean": 0.05344668477773666, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0041474332101643085, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0041474332101643085, "signal/accuracy_reward/centered_abs_mean": 0.0987060546875, "signal/accuracy_reward/group_std_mean": 0.1300501987338066, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04935302734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04935302734375, "signal/advantage_abs_mean": 0.06596999615430832, "signal/advantage_pre_scale_abs_mean": 0.06596999615430832, "signal/advantage_pre_scale_std": 0.1031409427523613, "signal/advantage_std": 0.1031409427523613, "signal/brier_reward/centered_abs_mean": 0.11196524053812026, "signal/brier_reward/group_std_mean": 0.14593787491321564, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01119652446359396, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01119652446359396, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013332638517022133, "signal/confidence_uniqueness_reward/group_std_mean": 0.01807792242616415, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013332638889551162, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013332638889551162, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023726322688162327, "signal/frontier_aurc_reward/group_std_mean": 0.003939795168116688, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.965790372400079e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.965790372400079e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.004360213689506054, "signal/frontier_ece_reward/group_std_mean": 0.0056990132667124275, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004360213817562908, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004360213817562908, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27509679198265075, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3451230525970459, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027509679645299913, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027509679645299913, "step": 275 }, { "calibration/aurc": 0.35513378681848906, "calibration/batch_distribution_entropy": 0.9721715794391692, "calibration/buffer_distribution_entropy": 0.9958760977657816, "calibration/confidence_entropy": 0.47022197464919013, "calibration/coverage@0%": 0.00978167808219178, "calibration/coverage@1%": 0.00978167808219178, "calibration/coverage@10%": 0.04657228473581213, "calibration/coverage@15%": 0.1555826504403131, "calibration/coverage@20%": 0.2169512903620352, "calibration/coverage@25%": 0.3224467954990215, "calibration/coverage@30%": 0.47991071428571425, "calibration/coverage@5%": 0.010173067514677104, "calibration/ece": 0.144891333338042, "calibration/mean_confidence": 0.5285554545851768, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 661.8, "completions/max_terminated_length": 661.8, "completions/mean_length": 217.53076171875, "completions/mean_terminated_length": 217.6579162597656, "completions/min_length": 20.0, "completions/min_terminated_length": 103.2, "epoch": 0.896, "grad_norm": 0.0009623025543987751, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 950003877.0, "reward": 0.9346998453140258, "reward_std": 0.0807345226407051, "rewards/accgated_coverage_0": 0.02446789890527725, "rewards/accgated_coverage_1": 0.02446789890527725, "rewards/accgated_coverage_10": 0.02446789890527725, "rewards/accgated_coverage_15": 0.023514636792242527, "rewards/accgated_coverage_20": 0.016649814136326314, "rewards/accgated_coverage_25": 0.01559778805822134, "rewards/accgated_coverage_5": 0.02446789890527725, "rewards/accuracy_reward": 0.53408203125, "rewards/brier_reward": 0.8033548951148987, "rewards/confidence_uniqueness_reward": 0.9509935498237609, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0031520756892859936, "rewards/frontier_ece_reward": 0.0023849430959671735, "rewards/frontier_entropy_batch_reward": -0.2299666076898575, "signal/accgated_coverage_0/centered_abs_mean": 0.04366839006543159, "signal/accgated_coverage_0/group_std_mean": 0.056435997039079665, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004366839025169611, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004366839025169611, "signal/accgated_coverage_1/centered_abs_mean": 0.04366839006543159, "signal/accgated_coverage_1/group_std_mean": 0.056435997039079665, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004366839025169611, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004366839025169611, "signal/accgated_coverage_10/centered_abs_mean": 0.04366839006543159, "signal/accgated_coverage_10/group_std_mean": 0.056435997039079665, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004366839025169611, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004366839025169611, "signal/accgated_coverage_15/centered_abs_mean": 0.039437131583690645, "signal/accgated_coverage_15/group_std_mean": 0.05089409127831459, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00394371310248971, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00394371310248971, "signal/accgated_coverage_20/centered_abs_mean": 0.024134864658117296, "signal/accgated_coverage_20/group_std_mean": 0.03109540343284607, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0024134865030646323, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0024134865030646323, "signal/accgated_coverage_25/centered_abs_mean": 0.016570880077779293, "signal/accgated_coverage_25/group_std_mean": 0.020994833111763, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0016570880776271223, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0016570880776271223, "signal/accgated_coverage_5/centered_abs_mean": 0.04366839006543159, "signal/accgated_coverage_5/group_std_mean": 0.056435997039079665, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004366839025169611, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004366839025169611, "signal/accuracy_reward/centered_abs_mean": 0.083636474609375, "signal/accuracy_reward/group_std_mean": 0.11912869811058044, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0418182373046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0418182373046875, "signal/advantage_abs_mean": 0.06075609400868416, "signal/advantage_pre_scale_abs_mean": 0.06075609400868416, "signal/advantage_pre_scale_std": 0.09686070084571838, "signal/advantage_std": 0.09686070084571838, "signal/brier_reward/centered_abs_mean": 0.10437444597482681, "signal/brier_reward/group_std_mean": 0.13576384782791137, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010437444783747196, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010437444783747196, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01360289491713047, "signal/confidence_uniqueness_reward/group_std_mean": 0.01908070743083954, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013602895196527243, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013602895196527243, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.0038669900968670845, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026476346887648106, "signal/frontier_aurc_reward/group_std_mean": 0.004367242194712162, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3095434264396315e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3095434264396315e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.004144516214728356, "signal/frontier_ece_reward/group_std_mean": 0.005457306373864412, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004144516307860613, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004144516307860613, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2829400360584259, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3546809792518616, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02829400487244129, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02829400487244129, "step": 280 }, { "calibration/aurc": 0.38707537310481943, "calibration/batch_distribution_entropy": 0.9719699729956378, "calibration/buffer_distribution_entropy": 0.995588973701375, "calibration/confidence_entropy": 0.4871019202027833, "calibration/coverage@0%": 0.022777498904263376, "calibration/coverage@1%": 0.022777498904263376, "calibration/coverage@10%": 0.0830656638193534, "calibration/coverage@15%": 0.17503398795987296, "calibration/coverage@20%": 0.27094699097747393, "calibration/coverage@25%": 0.3676189943540729, "calibration/coverage@30%": 0.42396939632841724, "calibration/coverage@5%": 0.027885553914086557, "calibration/ece": 0.14931698159648296, "calibration/mean_confidence": 0.5074117001231717, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013671875, "completions/max_length": 839.2, "completions/max_terminated_length": 839.2, "completions/mean_length": 219.380078125, "completions/mean_terminated_length": 219.6826599121094, "completions/min_length": 37.4, "completions/min_terminated_length": 102.6, "epoch": 0.912, "grad_norm": 0.0010746036423370242, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 967301625.0, "reward": 0.9356503963470459, "reward_std": 0.08552553951740265, "rewards/accgated_coverage_0": 0.023921893909573555, "rewards/accgated_coverage_1": 0.023921893909573555, "rewards/accgated_coverage_10": 0.023920951783657073, "rewards/accgated_coverage_15": 0.02283487160457298, "rewards/accgated_coverage_20": 0.016963693872094156, "rewards/accgated_coverage_25": 0.01679763663560152, "rewards/accgated_coverage_5": 0.023921893909573555, "rewards/accuracy_reward": 0.5359375, "rewards/brier_reward": 0.8047610640525817, "rewards/confidence_uniqueness_reward": 0.9503351330757142, "rewards/format_reward": 0.9986328125, "rewards/frontier_aurc_reward": -0.0026335842441767452, "rewards/frontier_ece_reward": 0.00246874107979238, "rewards/frontier_entropy_batch_reward": -0.22586590945720672, "signal/accgated_coverage_0/centered_abs_mean": 0.04468504786491394, "signal/accgated_coverage_0/group_std_mean": 0.05757425650954247, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004468504665419459, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004468504665419459, "signal/accgated_coverage_1/centered_abs_mean": 0.04468504786491394, "signal/accgated_coverage_1/group_std_mean": 0.05757425650954247, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004468504665419459, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004468504665419459, "signal/accgated_coverage_10/centered_abs_mean": 0.044683948159217834, "signal/accgated_coverage_10/group_std_mean": 0.057572783529758455, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004468394769355654, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004468394769355654, "signal/accgated_coverage_15/centered_abs_mean": 0.0402244932949543, "signal/accgated_coverage_15/group_std_mean": 0.0519582524895668, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004022449580952525, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004022449580952525, "signal/accgated_coverage_20/centered_abs_mean": 0.02362305298447609, "signal/accgated_coverage_20/group_std_mean": 0.03058423213660717, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0023623052751645447, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0023623052751645447, "signal/accgated_coverage_25/centered_abs_mean": 0.016879346594214438, "signal/accgated_coverage_25/group_std_mean": 0.021654066629707813, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.001687934761866927, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.001687934761866927, "signal/accgated_coverage_5/centered_abs_mean": 0.04468504786491394, "signal/accgated_coverage_5/group_std_mean": 0.05757425650954247, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004468504665419459, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004468504665419459, "signal/accuracy_reward/centered_abs_mean": 0.08746337890625, "signal/accuracy_reward/group_std_mean": 0.12233798801898957, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.043731689453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.043731689453125, "signal/advantage_abs_mean": 0.06453572064638138, "signal/advantage_pre_scale_abs_mean": 0.06453572064638138, "signal/advantage_pre_scale_std": 0.10137254744768143, "signal/advantage_std": 0.10137254744768143, "signal/brier_reward/centered_abs_mean": 0.11355163305997848, "signal/brier_reward/group_std_mean": 0.14732645452022552, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011355163529515266, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011355163529515266, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014806121587753296, "signal/confidence_uniqueness_reward/group_std_mean": 0.02111569344997406, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014806122286245226, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014806122286245226, "signal/format_reward/centered_abs_mean": 0.0025634765625, "signal/format_reward/group_std_mean": 0.005934012494981289, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00128173828125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00128173828125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002372403466142714, "signal/frontier_aurc_reward/group_std_mean": 0.004163792729377747, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9655042089871132e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9655042089871132e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.004044037964195013, "signal/frontier_ece_reward/group_std_mean": 0.005374080128967762, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000404403789434582, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000404403789434582, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28103450536727903, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35467966794967654, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028103450685739516, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028103450685739516, "step": 285 }, { "calibration/aurc": 0.3867435653616995, "calibration/batch_distribution_entropy": 0.9813316739371245, "calibration/buffer_distribution_entropy": 0.9959011772524413, "calibration/confidence_entropy": 0.49522539173686414, "calibration/coverage@0%": 0.007423403864970646, "calibration/coverage@1%": 0.007423403864970646, "calibration/coverage@10%": 0.019532778864970644, "calibration/coverage@15%": 0.03164826932485323, "calibration/coverage@20%": 0.1396342954990215, "calibration/coverage@25%": 0.23307393590998043, "calibration/coverage@30%": 0.30033252813111544, "calibration/coverage@5%": 0.007423403864970646, "calibration/ece": 0.1281636041486433, "calibration/mean_confidence": 0.4964639264819253, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 716.8, "completions/max_terminated_length": 716.8, "completions/mean_length": 214.851953125, "completions/mean_terminated_length": 214.97773132324218, "completions/min_length": 22.0, "completions/min_terminated_length": 97.8, "epoch": 0.928, "grad_norm": 0.0007004987564869225, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 984528525.0, "reward": 0.9267191290855408, "reward_std": 0.08182481080293655, "rewards/accgated_coverage_0": 0.027588574960827828, "rewards/accgated_coverage_1": 0.027588574960827828, "rewards/accgated_coverage_10": 0.027588574960827828, "rewards/accgated_coverage_15": 0.025525929778814314, "rewards/accgated_coverage_20": 0.017605995759367943, "rewards/accgated_coverage_25": 0.015273153223097325, "rewards/accgated_coverage_5": 0.027588574960827828, "rewards/accuracy_reward": 0.51845703125, "rewards/brier_reward": 0.7924355387687683, "rewards/confidence_uniqueness_reward": 0.9507668256759644, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0030933755449950697, "rewards/frontier_ece_reward": 0.002328445459716022, "rewards/frontier_entropy_batch_reward": -0.23606752157211303, "signal/accgated_coverage_0/centered_abs_mean": 0.04182121828198433, "signal/accgated_coverage_0/group_std_mean": 0.05356579944491387, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004182121716439724, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004182121716439724, "signal/accgated_coverage_1/centered_abs_mean": 0.04182121828198433, "signal/accgated_coverage_1/group_std_mean": 0.05356579944491387, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004182121716439724, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004182121716439724, "signal/accgated_coverage_10/centered_abs_mean": 0.04182121828198433, "signal/accgated_coverage_10/group_std_mean": 0.05356579944491387, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004182121716439724, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004182121716439724, "signal/accgated_coverage_15/centered_abs_mean": 0.03778692409396171, "signal/accgated_coverage_15/group_std_mean": 0.048517832159996034, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0037786925211548807, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0037786925211548807, "signal/accgated_coverage_20/centered_abs_mean": 0.022749120369553566, "signal/accgated_coverage_20/group_std_mean": 0.029339329153299332, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0022749120369553568, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0022749120369553568, "signal/accgated_coverage_25/centered_abs_mean": 0.016079240664839744, "signal/accgated_coverage_25/group_std_mean": 0.02050723284482956, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0016079240944236518, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0016079240944236518, "signal/accgated_coverage_5/centered_abs_mean": 0.04182121828198433, "signal/accgated_coverage_5/group_std_mean": 0.05356579944491387, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004182121716439724, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004182121716439724, "signal/accuracy_reward/centered_abs_mean": 0.086602783203125, "signal/accuracy_reward/group_std_mean": 0.11836623698472977, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0433013916015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0433013916015625, "signal/advantage_abs_mean": 0.06185290068387985, "signal/advantage_pre_scale_abs_mean": 0.06185290068387985, "signal/advantage_pre_scale_std": 0.09768745750188827, "signal/advantage_std": 0.09768745750188827, "signal/brier_reward/centered_abs_mean": 0.11498722583055496, "signal/brier_reward/group_std_mean": 0.14888761341571807, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011498722806572914, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011498722806572914, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014011159539222717, "signal/confidence_uniqueness_reward/group_std_mean": 0.01889067105948925, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014011159539222718, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014011159539222718, "signal/format_reward/centered_abs_mean": 0.001123046875, "signal/format_reward/group_std_mean": 0.0029782545287162067, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005615234375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005615234375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027191273402422667, "signal/frontier_aurc_reward/group_std_mean": 0.004544518515467643, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3989092116826214e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3989092116826214e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.0042263313196599485, "signal/frontier_ece_reward/group_std_mean": 0.005566684249788523, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004226331366226077, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004226331366226077, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28662583231925964, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3584433555603027, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028662583604454995, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028662583604454995, "step": 290 }, { "calibration/aurc": 0.26530272496950286, "calibration/batch_distribution_entropy": 0.9849073126919665, "calibration/buffer_distribution_entropy": 0.9962016302239295, "calibration/confidence_entropy": 0.5012886823644893, "calibration/coverage@0%": 0.022666196615632555, "calibration/coverage@1%": 0.022666196615632555, "calibration/coverage@10%": 0.1883402162234757, "calibration/coverage@15%": 0.2782153469264418, "calibration/coverage@20%": 0.3884802002992978, "calibration/coverage@25%": 0.4768226708491616, "calibration/coverage@30%": 0.5859547402008749, "calibration/coverage@5%": 0.07540057161563256, "calibration/ece": 0.08265569427508188, "calibration/mean_confidence": 0.48410358096480427, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 660.8, "completions/max_terminated_length": 660.8, "completions/mean_length": 213.1173828125, "completions/mean_terminated_length": 213.2224548339844, "completions/min_length": 64.0, "completions/min_terminated_length": 106.2, "epoch": 0.944, "grad_norm": 0.0007865950465202332, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 1001686271.0, "reward": 0.9313256740570068, "reward_std": 0.08973944038152695, "rewards/accgated_coverage_0": 0.025818699225783347, "rewards/accgated_coverage_1": 0.025818699225783347, "rewards/accgated_coverage_10": 0.025818699225783347, "rewards/accgated_coverage_15": 0.024165811762213708, "rewards/accgated_coverage_20": 0.01818331703543663, "rewards/accgated_coverage_25": 0.014625198766589164, "rewards/accgated_coverage_5": 0.025818699225783347, "rewards/accuracy_reward": 0.5248046875, "rewards/brier_reward": 0.798682701587677, "rewards/confidence_uniqueness_reward": 0.9513814806938171, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0027979562990367414, "rewards/frontier_ece_reward": 0.0022492259275168182, "rewards/frontier_entropy_batch_reward": -0.22004973590373994, "signal/accgated_coverage_0/centered_abs_mean": 0.044853395968675616, "signal/accgated_coverage_0/group_std_mean": 0.05786952823400497, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00448533957824111, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00448533957824111, "signal/accgated_coverage_1/centered_abs_mean": 0.044853395968675616, "signal/accgated_coverage_1/group_std_mean": 0.05786952823400497, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00448533957824111, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00448533957824111, "signal/accgated_coverage_10/centered_abs_mean": 0.044853395968675616, "signal/accgated_coverage_10/group_std_mean": 0.05786952823400497, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00448533957824111, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00448533957824111, "signal/accgated_coverage_15/centered_abs_mean": 0.04129325300455093, "signal/accgated_coverage_15/group_std_mean": 0.05330209955573082, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00412932513281703, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00412932513281703, "signal/accgated_coverage_20/centered_abs_mean": 0.024261708557605743, "signal/accgated_coverage_20/group_std_mean": 0.031504085287451744, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0024261708138510587, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0024261708138510587, "signal/accgated_coverage_25/centered_abs_mean": 0.016015125811100005, "signal/accgated_coverage_25/group_std_mean": 0.020626705139875412, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0016015126369893552, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0016015126369893552, "signal/accgated_coverage_5/centered_abs_mean": 0.044853395968675616, "signal/accgated_coverage_5/group_std_mean": 0.05786952823400497, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00448533957824111, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00448533957824111, "signal/accuracy_reward/centered_abs_mean": 0.1136474609375, "signal/accuracy_reward/group_std_mean": 0.148611381649971, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05682373046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05682373046875, "signal/advantage_abs_mean": 0.06972624510526657, "signal/advantage_pre_scale_abs_mean": 0.06972624510526657, "signal/advantage_pre_scale_std": 0.10730479061603546, "signal/advantage_std": 0.10730479061603546, "signal/brier_reward/centered_abs_mean": 0.11074768900871276, "signal/brier_reward/group_std_mean": 0.14232046902179718, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011074769496917724, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011074769496917724, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013354774564504623, "signal/confidence_uniqueness_reward/group_std_mean": 0.018452975898981094, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013354774564504623, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013354774564504623, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629800856113, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021938590798527002, "signal/frontier_aurc_reward/group_std_mean": 0.003584741707891226, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7423238498158754e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7423238498158754e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.004127887263894081, "signal/frontier_ece_reward/group_std_mean": 0.005519901774823666, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00041278875432908534, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00041278875432908534, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.279949414730072, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3534364700317383, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02799494154751301, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02799494154751301, "step": 295 }, { "calibration/aurc": 0.34402592275438887, "calibration/batch_distribution_entropy": 0.9760305696696996, "calibration/buffer_distribution_entropy": 0.9963368302840016, "calibration/confidence_entropy": 0.4657364091598736, "calibration/coverage@0%": 0.007421875, "calibration/coverage@1%": 0.007421875, "calibration/coverage@10%": 0.065625, "calibration/coverage@15%": 0.211328125, "calibration/coverage@20%": 0.265234375, "calibration/coverage@25%": 0.3265625, "calibration/coverage@30%": 0.37265625, "calibration/coverage@5%": 0.011328125, "calibration/ece": 0.14689679746783818, "calibration/mean_confidence": 0.5482983593374213, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 907.2, "completions/max_terminated_length": 907.2, "completions/mean_length": 208.171484375, "completions/mean_terminated_length": 208.1913269042969, "completions/min_length": 82.8, "completions/min_terminated_length": 103.8, "epoch": 0.96, "grad_norm": 0.0008087375317700207, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 1018758267.0, "reward": 0.9315295815467834, "reward_std": 0.07846418023109436, "rewards/accgated_coverage_0": 0.02990303039550781, "rewards/accgated_coverage_1": 0.02990303039550781, "rewards/accgated_coverage_10": 0.029898150265216826, "rewards/accgated_coverage_15": 0.028139904513955115, "rewards/accgated_coverage_20": 0.019901422411203386, "rewards/accgated_coverage_25": 0.01664300709962845, "rewards/accgated_coverage_5": 0.02990303039550781, "rewards/accuracy_reward": 0.51904296875, "rewards/brier_reward": 0.8051493167877197, "rewards/confidence_uniqueness_reward": 0.9509793639183044, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.003521607397124171, "rewards/frontier_ece_reward": 0.002893084893003106, "rewards/frontier_entropy_batch_reward": -0.22230381965637208, "signal/accgated_coverage_0/centered_abs_mean": 0.03826850652694702, "signal/accgated_coverage_0/group_std_mean": 0.05015629380941391, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0038268506061285732, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0038268506061285732, "signal/accgated_coverage_1/centered_abs_mean": 0.03826850652694702, "signal/accgated_coverage_1/group_std_mean": 0.05015629380941391, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0038268506061285732, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0038268506061285732, "signal/accgated_coverage_10/centered_abs_mean": 0.03825867623090744, "signal/accgated_coverage_10/group_std_mean": 0.05014391764998436, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0038258675020188095, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0038258675020188095, "signal/accgated_coverage_15/centered_abs_mean": 0.03575834967195988, "signal/accgated_coverage_15/group_std_mean": 0.0468507744371891, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0035758350044488908, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0035758350044488908, "signal/accgated_coverage_20/centered_abs_mean": 0.021050278469920157, "signal/accgated_coverage_20/group_std_mean": 0.02743927575647831, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0021050279028713703, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0021050279028713703, "signal/accgated_coverage_25/centered_abs_mean": 0.015643270313739778, "signal/accgated_coverage_25/group_std_mean": 0.01996114067733288, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0015643270453438163, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0015643270453438163, "signal/accgated_coverage_5/centered_abs_mean": 0.03826850652694702, "signal/accgated_coverage_5/group_std_mean": 0.05015629380941391, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0038268506061285732, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0038268506061285732, "signal/accuracy_reward/centered_abs_mean": 0.085845947265625, "signal/accuracy_reward/group_std_mean": 0.11925376802682877, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0429229736328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0429229736328125, "signal/advantage_abs_mean": 0.06013981848955154, "signal/advantage_pre_scale_abs_mean": 0.06013981848955154, "signal/advantage_pre_scale_std": 0.09505997449159623, "signal/advantage_std": 0.09505997449159623, "signal/brier_reward/centered_abs_mean": 0.10594068318605424, "signal/brier_reward/group_std_mean": 0.1382671058177948, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01059406865388155, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01059406865388155, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012687204778194428, "signal/confidence_uniqueness_reward/group_std_mean": 0.016237646527588367, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012687204871326685, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012687204871326685, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029484509490430356, "signal/frontier_aurc_reward/group_std_mean": 0.004800262581557036, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.685563715407625e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.685563715407625e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.004553208034485579, "signal/frontier_ece_reward/group_std_mean": 0.005956902913749218, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00045532081858254967, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00045532081858254967, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26913765668869016, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3415271699428558, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02691376656293869, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02691376656293869, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.44108417363651087, "eval_calibration/batch_distribution_entropy": 0.947659286598127, "eval_calibration/buffer_distribution_entropy": 0.9959698349464184, "eval_calibration/confidence_entropy": 0.4709155969582669, "eval_calibration/coverage@0%": 0.0625, "eval_calibration/coverage@1%": 0.0625, "eval_calibration/coverage@10%": 0.125, "eval_calibration/coverage@15%": 0.1328125, "eval_calibration/coverage@20%": 0.2109375, "eval_calibration/coverage@25%": 0.2265625, "eval_calibration/coverage@30%": 0.28125, "eval_calibration/coverage@5%": 0.0625, "eval_calibration/ece": 0.2087351441017339, "eval_calibration/mean_confidence": 0.5224578841219565, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 461.0, "eval_completions/max_terminated_length": 461.0, "eval_completions/mean_length": 207.15261459350586, "eval_completions/mean_terminated_length": 207.15261459350586, "eval_completions/min_length": 110.5, "eval_completions/min_terminated_length": 110.5, "eval_loss": 0.0, "eval_num_tokens": 1018758267.0, "eval_reward": 0.8045169711112976, "eval_reward_std": 0.22579142451286316, "eval_rewards/accgated_coverage_0": 0.033579444978386164, "eval_rewards/accgated_coverage_1": 0.033579444978386164, "eval_rewards/accgated_coverage_10": 0.03355382476001978, "eval_rewards/accgated_coverage_15": 0.03188042528927326, "eval_rewards/accgated_coverage_20": 0.01990594994276762, "eval_rewards/accgated_coverage_25": 0.011771299876272678, "eval_rewards/accgated_coverage_5": 0.033579444978386164, "eval_rewards/accuracy_reward": 0.4296875, "eval_rewards/brier_reward": 0.7986667156219482, "eval_rewards/confidence_uniqueness_reward": 0.89794921875, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.004401608370244503, "eval_rewards/frontier_ece_reward": 0.0028167355339974165, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 22.9836, "eval_samples_per_second": 21.755, "eval_signal/accgated_coverage_0/centered_abs_mean": 0.06548797804862261, "eval_signal/accgated_coverage_0/group_std_mean": 0.07980928383767605, "eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.006548797828145325, "eval_signal/accgated_coverage_0/weight": 0.10000000149011612, "eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.006548797828145325, "eval_signal/accgated_coverage_1/centered_abs_mean": 0.06548797804862261, "eval_signal/accgated_coverage_1/group_std_mean": 0.07980928383767605, "eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.006548797828145325, "eval_signal/accgated_coverage_1/weight": 0.10000000149011612, "eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.006548797828145325, "eval_signal/accgated_coverage_10/centered_abs_mean": 0.06543731037527323, "eval_signal/accgated_coverage_10/group_std_mean": 0.07975173369050026, "eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0065437310840934515, "eval_signal/accgated_coverage_10/weight": 0.10000000149011612, "eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0065437310840934515, "eval_signal/accgated_coverage_15/centered_abs_mean": 0.062019459903240204, "eval_signal/accgated_coverage_15/group_std_mean": 0.07585378549993038, "eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006201946176588535, "eval_signal/accgated_coverage_15/weight": 0.10000000149011612, "eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006201946176588535, "eval_signal/accgated_coverage_20/centered_abs_mean": 0.03423011302947998, "eval_signal/accgated_coverage_20/group_std_mean": 0.042966075241565704, "eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0034230112796649337, "eval_signal/accgated_coverage_20/weight": 0.10000000149011612, "eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0034230112796649337, "eval_signal/accgated_coverage_25/centered_abs_mean": 0.01742625329643488, "eval_signal/accgated_coverage_25/group_std_mean": 0.022265508770942688, "eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0017426253180019557, "eval_signal/accgated_coverage_25/weight": 0.10000000149011612, "eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0017426253180019557, "eval_signal/accgated_coverage_5/centered_abs_mean": 0.06548797804862261, "eval_signal/accgated_coverage_5/group_std_mean": 0.07980928383767605, "eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0, "eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.006548797828145325, "eval_signal/accgated_coverage_5/weight": 0.10000000149011612, "eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.006548797828145325, "eval_signal/accuracy_reward/centered_abs_mean": 0.473388671875, "eval_signal/accuracy_reward/group_std_mean": 0.49401039630174637, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2366943359375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2366943359375, "eval_signal/advantage_abs_mean": 0.20914247632026672, "eval_signal/advantage_pre_scale_abs_mean": 0.20914247632026672, "eval_signal/advantage_pre_scale_std": 0.22344782203435898, "eval_signal/advantage_std": 0.22344782203435898, "eval_signal/brier_reward/centered_abs_mean": 0.19477688893675804, "eval_signal/brier_reward/group_std_mean": 0.2502768486738205, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019477689173072577, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019477689173072577, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0395965576171875, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04632946569472551, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003959655878134072, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003959655878134072, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0058913074899464846, "eval_signal/frontier_aurc_reward/group_std_mean": 0.011559756007045507, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.364134944509715e-05, "eval_signal/frontier_aurc_reward/weight": 0.012500000186264515, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.364134944509715e-05, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.0058010019129142165, "eval_signal/frontier_ece_reward/group_std_mean": 0.008047543233260512, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005801001680083573, "eval_signal/frontier_ece_reward/weight": 0.10000000149011612, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005801001680083573, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.174, "step": 300 }, { "calibration/aurc": 0.2518197302031364, "calibration/batch_distribution_entropy": 0.9684625187956735, "calibration/buffer_distribution_entropy": 0.9958406031055667, "calibration/confidence_entropy": 0.49119229362433725, "calibration/coverage@0%": 0.048463541666666665, "calibration/coverage@1%": 0.048463541666666665, "calibration/coverage@10%": 0.28565410539215685, "calibration/coverage@15%": 0.4068658088235294, "calibration/coverage@20%": 0.4940502450980392, "calibration/coverage@25%": 0.5573927696078431, "calibration/coverage@30%": 0.6101761642156862, "calibration/coverage@5%": 0.11487438725490196, "calibration/ece": 0.11883158877655049, "calibration/mean_confidence": 0.5323535677827451, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 869.6, "completions/max_terminated_length": 869.6, "completions/mean_length": 205.5177734375, "completions/mean_terminated_length": 205.68072204589845, "completions/min_length": 18.6, "completions/min_terminated_length": 97.8, "epoch": 0.976, "grad_norm": 0.0008637637365609407, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 1035723889.0, "reward": 0.9390368580818176, "reward_std": 0.08419644683599473, "rewards/accgated_coverage_0": 0.025884364638477562, "rewards/accgated_coverage_1": 0.025884364638477562, "rewards/accgated_coverage_10": 0.025869949627667664, "rewards/accgated_coverage_15": 0.024993031146004797, "rewards/accgated_coverage_20": 0.018615927174687387, "rewards/accgated_coverage_25": 0.015474013239145278, "rewards/accgated_coverage_5": 0.025884364638477562, "rewards/accuracy_reward": 0.54248046875, "rewards/brier_reward": 0.806470787525177, "rewards/confidence_uniqueness_reward": 0.950226652622223, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0030988804530352352, "rewards/frontier_ece_reward": 0.002570468030171469, "rewards/frontier_entropy_batch_reward": -0.23961410224437713, "signal/accgated_coverage_0/centered_abs_mean": 0.04443902298808098, "signal/accgated_coverage_0/group_std_mean": 0.056985524296760556, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004443902382627129, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004443902382627129, "signal/accgated_coverage_1/centered_abs_mean": 0.04443902298808098, "signal/accgated_coverage_1/group_std_mean": 0.056985524296760556, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004443902382627129, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004443902382627129, "signal/accgated_coverage_10/centered_abs_mean": 0.044393166154623034, "signal/accgated_coverage_10/group_std_mean": 0.05692854225635528, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004439316829666496, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004439316829666496, "signal/accgated_coverage_15/centered_abs_mean": 0.041806505620479585, "signal/accgated_coverage_15/group_std_mean": 0.05369865670800209, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004180650692433119, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004180650692433119, "signal/accgated_coverage_20/centered_abs_mean": 0.023348334059119225, "signal/accgated_coverage_20/group_std_mean": 0.030321285501122473, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0023348334711045028, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0023348334711045028, "signal/accgated_coverage_25/centered_abs_mean": 0.01600625291466713, "signal/accgated_coverage_25/group_std_mean": 0.020638634264469147, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0016006252961233258, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0016006252961233258, "signal/accgated_coverage_5/centered_abs_mean": 0.04443902298808098, "signal/accgated_coverage_5/group_std_mean": 0.056985524296760556, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004443902382627129, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004443902382627129, "signal/accuracy_reward/centered_abs_mean": 0.095660400390625, "signal/accuracy_reward/group_std_mean": 0.12655805945396423, "signal/accuracy_reward/group_zero_std_frac": 0.634375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0478302001953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0478302001953125, "signal/advantage_abs_mean": 0.06486985236406326, "signal/advantage_pre_scale_abs_mean": 0.06486985236406326, "signal/advantage_pre_scale_std": 0.10080467015504838, "signal/advantage_std": 0.10080467015504838, "signal/brier_reward/centered_abs_mean": 0.10289306491613388, "signal/brier_reward/group_std_mean": 0.1338332325220108, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010289306752383709, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010289306752383709, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014014300890266895, "signal/confidence_uniqueness_reward/group_std_mean": 0.019804118014872075, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014014300424605608, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014014300424605608, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_std_mean": 0.004419417306780815, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026059220312163235, "signal/frontier_aurc_reward/group_std_mean": 0.004272950720041991, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.2574026408838105e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.2574026408838105e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.00430595139041543, "signal/frontier_ece_reward/group_std_mean": 0.005667751654982567, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00043059513554908333, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00043059513554908333, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29276729822158815, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3635729193687439, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02927673012018204, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02927673012018204, "step": 305 }, { "calibration/aurc": 0.3662020936085089, "calibration/batch_distribution_entropy": 0.9741297166815833, "calibration/buffer_distribution_entropy": 0.9956483661118323, "calibration/confidence_entropy": 0.47563083944507, "calibration/coverage@0%": 0.003520220588235294, "calibration/coverage@1%": 0.003520220588235294, "calibration/coverage@10%": 0.05664522058823529, "calibration/coverage@15%": 0.09883272058823529, "calibration/coverage@20%": 0.15978400735294118, "calibration/coverage@25%": 0.28942555147058824, "calibration/coverage@30%": 0.3864047181372549, "calibration/coverage@5%": 0.003520220588235294, "calibration/ece": 0.13104297770085743, "calibration/mean_confidence": 0.4632078044857339, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 685.2, "completions/max_terminated_length": 685.2, "completions/mean_length": 199.6486328125, "completions/mean_terminated_length": 199.74756469726563, "completions/min_length": 39.0, "completions/min_terminated_length": 97.6, "epoch": 0.992, "grad_norm": 0.0008076268131844699, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 1052896771.0, "reward": 0.9218993306159973, "reward_std": 0.08259947896003723, "rewards/accgated_coverage_0": 0.02501909025013447, "rewards/accgated_coverage_1": 0.02501909025013447, "rewards/accgated_coverage_10": 0.024991927668452263, "rewards/accgated_coverage_15": 0.02396598644554615, "rewards/accgated_coverage_20": 0.017341459915041924, "rewards/accgated_coverage_25": 0.015913099609315397, "rewards/accgated_coverage_5": 0.02501909025013447, "rewards/accuracy_reward": 0.511328125, "rewards/brier_reward": 0.8048971056938171, "rewards/confidence_uniqueness_reward": 0.9490490436553956, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.002771574491634965, "rewards/frontier_ece_reward": 0.002409780048765242, "rewards/frontier_entropy_batch_reward": -0.24799684882164003, "signal/accgated_coverage_0/centered_abs_mean": 0.04057878255844116, "signal/accgated_coverage_0/group_std_mean": 0.05137483105063438, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004057878255844116, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004057878255844116, "signal/accgated_coverage_1/centered_abs_mean": 0.04057878255844116, "signal/accgated_coverage_1/group_std_mean": 0.05137483105063438, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004057878255844116, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004057878255844116, "signal/accgated_coverage_10/centered_abs_mean": 0.040538350492715834, "signal/accgated_coverage_10/group_std_mean": 0.05132369995117188, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004053835105150938, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004053835105150938, "signal/accgated_coverage_15/centered_abs_mean": 0.03887024968862533, "signal/accgated_coverage_15/group_std_mean": 0.04922617822885513, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0038870248012244702, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0038870248012244702, "signal/accgated_coverage_20/centered_abs_mean": 0.021970576792955398, "signal/accgated_coverage_20/group_std_mean": 0.02805563621222973, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0021970578003674746, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0021970578003674746, "signal/accgated_coverage_25/centered_abs_mean": 0.015556910447776318, "signal/accgated_coverage_25/group_std_mean": 0.0197615884244442, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0015556911006569863, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0015556911006569863, "signal/accgated_coverage_5/centered_abs_mean": 0.04057878255844116, "signal/accgated_coverage_5/group_std_mean": 0.05137483105063438, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004057878255844116, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004057878255844116, "signal/accuracy_reward/centered_abs_mean": 0.08858642578125, "signal/accuracy_reward/group_std_mean": 0.11626765429973603, "signal/accuracy_reward/group_zero_std_frac": 0.665625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044293212890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.044293212890625, "signal/advantage_abs_mean": 0.06369578093290329, "signal/advantage_pre_scale_abs_mean": 0.06369578093290329, "signal/advantage_pre_scale_std": 0.1002244383096695, "signal/advantage_std": 0.1002244383096695, "signal/brier_reward/centered_abs_mean": 0.10709832906723023, "signal/brier_reward/group_std_mean": 0.1369374841451645, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010709832608699798, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010709832608699798, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014920119382441043, "signal/confidence_uniqueness_reward/group_std_mean": 0.02039230614900589, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014920119661837815, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014920119661837815, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629800856113, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.00225202739238739, "signal/frontier_aurc_reward/group_std_mean": 0.0036956620868295433, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8150342404842377e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8150342404842377e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.0042619360610842705, "signal/frontier_ece_reward/group_std_mean": 0.0056199970655143264, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00042619362357072533, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00042619362357072533, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2883952736854553, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3603264093399048, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028839527815580367, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028839527815580367, "step": 310 }, { "calibration/aurc": 0.2565749108783924, "calibration/batch_distribution_entropy": 0.9558861737758754, "calibration/buffer_distribution_entropy": 0.9958182821581034, "calibration/confidence_entropy": 0.46042922907413786, "calibration/coverage@0%": 0.017578125, "calibration/coverage@1%": 0.017578125, "calibration/coverage@10%": 0.111328125, "calibration/coverage@15%": 0.1962890625, "calibration/coverage@20%": 0.3427734375, "calibration/coverage@25%": 0.5517578125, "calibration/coverage@30%": 0.640625, "calibration/coverage@5%": 0.0517578125, "calibration/ece": 0.14136155363744674, "calibration/mean_confidence": 0.606045254981874, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.5, "completions/max_terminated_length": 502.5, "completions/mean_length": 202.1562957763672, "completions/mean_terminated_length": 202.1562957763672, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.9984, "num_tokens": 1059728009.0, "reward": 0.9332478642463684, "reward_std": 0.08353905007243156, "rewards/accgated_coverage_0": 0.015768482349812984, "rewards/accgated_coverage_1": 0.015768482349812984, "rewards/accgated_coverage_10": 0.015732761472463608, "rewards/accgated_coverage_15": 0.014848333783447742, "rewards/accgated_coverage_20": 0.012655510101467371, "rewards/accgated_coverage_25": 0.013384385034441948, "rewards/accgated_coverage_5": 0.015768482349812984, "rewards/accuracy_reward": 0.536376953125, "rewards/brier_reward": 0.7888486683368683, "rewards/confidence_uniqueness_reward": 0.9530830383300781, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0028738367836922407, "rewards/frontier_ece_reward": 0.002294275094754994, "rewards/frontier_entropy_batch_reward": -0.19719929993152618, "signal/accgated_coverage_0/centered_abs_mean": 0.043060190975666046, "signal/accgated_coverage_0/group_std_mean": 0.05668780021369457, "signal/accgated_coverage_0/group_zero_std_frac": 0.0, "signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004306019051000476, "signal/accgated_coverage_0/weight": 0.10000000149011612, "signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004306019051000476, "signal/accgated_coverage_1/centered_abs_mean": 0.043060190975666046, "signal/accgated_coverage_1/group_std_mean": 0.05668780021369457, "signal/accgated_coverage_1/group_zero_std_frac": 0.0, "signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004306019051000476, "signal/accgated_coverage_1/weight": 0.10000000149011612, "signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004306019051000476, "signal/accgated_coverage_10/centered_abs_mean": 0.04302673973143101, "signal/accgated_coverage_10/group_std_mean": 0.056644506752491, "signal/accgated_coverage_10/group_zero_std_frac": 0.0, "signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004302673973143101, "signal/accgated_coverage_10/weight": 0.10000000149011612, "signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004302673973143101, "signal/accgated_coverage_15/centered_abs_mean": 0.04146258533000946, "signal/accgated_coverage_15/group_std_mean": 0.05463242903351784, "signal/accgated_coverage_15/group_zero_std_frac": 0.0, "signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004146258695982397, "signal/accgated_coverage_15/weight": 0.10000000149011612, "signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004146258695982397, "signal/accgated_coverage_20/centered_abs_mean": 0.02249743789434433, "signal/accgated_coverage_20/group_std_mean": 0.0300173107534647, "signal/accgated_coverage_20/group_zero_std_frac": 0.0, "signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.002249743905849755, "signal/accgated_coverage_20/weight": 0.10000000149011612, "signal/accgated_coverage_20/weighted_centered_abs_mean": 0.002249743905849755, "signal/accgated_coverage_25/centered_abs_mean": 0.015130959451198578, "signal/accgated_coverage_25/group_std_mean": 0.01980656199157238, "signal/accgated_coverage_25/group_zero_std_frac": 0.0, "signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0015130960382521152, "signal/accgated_coverage_25/weight": 0.10000000149011612, "signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0015130960382521152, "signal/accgated_coverage_5/centered_abs_mean": 0.043060190975666046, "signal/accgated_coverage_5/group_std_mean": 0.05668780021369457, "signal/accgated_coverage_5/group_zero_std_frac": 0.0, "signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004306019051000476, "signal/accgated_coverage_5/weight": 0.10000000149011612, "signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004306019051000476, "signal/accuracy_reward/centered_abs_mean": 0.0918731689453125, "signal/accuracy_reward/group_std_mean": 0.12753162533044815, "signal/accuracy_reward/group_zero_std_frac": 0.6171875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04593658447265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04593658447265625, "signal/advantage_abs_mean": 0.06494300253689289, "signal/advantage_pre_scale_abs_mean": 0.06494300253689289, "signal/advantage_pre_scale_std": 0.10036676377058029, "signal/advantage_std": 0.10036676377058029, "signal/brier_reward/centered_abs_mean": 0.10487185046076775, "signal/brier_reward/group_std_mean": 0.13467831909656525, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010487185325473547, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010487185325473547, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011093974113464355, "signal/confidence_uniqueness_reward/group_std_mean": 0.013951313681900501, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001109397446271032, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001109397446271032, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002461986499838531, "signal/frontier_aurc_reward/group_std_mean": 0.004381507635116577, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.077483233937528e-05, "signal/frontier_aurc_reward/weight": 0.012500000186264515, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.077483233937528e-05, "signal/frontier_ece_reward/centered_abs_mean": 0.00429272442124784, "signal/frontier_ece_reward/group_std_mean": 0.005649249535053968, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004292724479455501, "signal/frontier_ece_reward/weight": 0.10000000149011612, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004292724479455501, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2588946372270584, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32734930515289307, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02588946372270584, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02588946372270584, "step": 312, "total_flos": 0.0, "train_loss": -0.000582187315441656, "train_runtime": 61430.7382, "train_samples_per_second": 0.326, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1059728009, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }