Files
RLCR-v4-ks-uniqueness-cold-…/trainer_state.json
ModelHub XC 8ba1da4073 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-cold-math
Source: Original Platform
2026-05-30 06:56:20 +08:00

5721 lines
354 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.49919376007799904,
"eval_steps": 50,
"global_step": 208,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.514086638541351,
"calibration/batch_distribution_entropy": 0.27599249583875307,
"calibration/confidence_entropy": 0.2228992812774721,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4635714419377363,
"calibration/mean_confidence": 0.9145515300719154,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0203125,
"completions/max_length": 4017.2,
"completions/max_terminated_length": 4017.2,
"completions/mean_length": 517.5268188476563,
"completions/mean_terminated_length": 528.2714599609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.011999850001874977,
"grad_norm": 0.004794155713170767,
"learning_rate": 5.952380952380953e-07,
"loss": 0.008,
"num_tokens": 9076117.0,
"reward": 0.5741718530654907,
"reward_std": 0.5221742153167724,
"rewards/accuracy_reward": 0.2621527761220932,
"rewards/brier_reward": 0.31355856657028197,
"rewards/confidence_uniqueness_reward": 0.288547545671463,
"rewards/format_reward": 0.5970486044883728,
"rewards/frontier_aurc_reward": 0.27689927220344546,
"rewards/frontier_coverage_1": 0.27689927220344546,
"rewards/frontier_coverage_10": 0.27689927220344546,
"rewards/frontier_coverage_15": 0.27689927220344546,
"rewards/frontier_coverage_20": 0.27689927220344546,
"rewards/frontier_coverage_25": 0.27689927220344546,
"rewards/frontier_coverage_5": 0.27689927220344546,
"rewards/frontier_ece_reward": 0.27689927220344546,
"signal/accuracy_reward/centered_abs_mean": 0.3104600667953491,
"signal/accuracy_reward/group_std_mean": 0.3717172920703888,
"signal/accuracy_reward/group_zero_std_frac": 0.07777777910232545,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15523003339767455,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15523003339767455,
"signal/advantage_abs_mean": 0.4487810075283051,
"signal/advantage_pre_scale_abs_mean": 0.4487810075283051,
"signal/advantage_pre_scale_std": 0.5276062607765197,
"signal/advantage_std": 0.5276062607765197,
"signal/brier_reward/centered_abs_mean": 0.32066049575805666,
"signal/brier_reward/group_std_mean": 0.3748934090137482,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04008256196975708,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.04008256196975708,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2354918897151947,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2874836504459381,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029436486214399336,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029436486214399336,
"signal/format_reward/centered_abs_mean": 0.4400065064430237,
"signal/format_reward/group_std_mean": 0.47453489899635315,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.22000325322151185,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.22000325322151185,
"signal/frontier_aurc_reward/centered_abs_mean": 0.31218199133872987,
"signal/frontier_aurc_reward/group_std_mean": 0.3707219660282135,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005588056985288858,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005588056985288858,
"signal/frontier_coverage_1/centered_abs_mean": 0.31218199133872987,
"signal/frontier_coverage_1/group_std_mean": 0.3707219660282135,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005588056985288858,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005588056985288858,
"signal/frontier_coverage_10/centered_abs_mean": 0.31218199133872987,
"signal/frontier_coverage_10/group_std_mean": 0.3707219660282135,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005588056985288858,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005588056985288858,
"signal/frontier_coverage_15/centered_abs_mean": 0.31218199133872987,
"signal/frontier_coverage_15/group_std_mean": 0.3707219660282135,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005588056985288858,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005588056985288858,
"signal/frontier_coverage_20/centered_abs_mean": 0.31218199133872987,
"signal/frontier_coverage_20/group_std_mean": 0.3707219660282135,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005588056985288858,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005588056985288858,
"signal/frontier_coverage_25/centered_abs_mean": 0.31218199133872987,
"signal/frontier_coverage_25/group_std_mean": 0.3707219660282135,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005588056985288858,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005588056985288858,
"signal/frontier_coverage_5/centered_abs_mean": 0.31218199133872987,
"signal/frontier_coverage_5/group_std_mean": 0.3707219660282135,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005588056985288858,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005588056985288858,
"signal/frontier_ece_reward/centered_abs_mean": 0.31218199133872987,
"signal/frontier_ece_reward/group_std_mean": 0.3707219660282135,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.039022748917341234,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.039022748917341234,
"step": 5
},
{
"calibration/aurc": 0.5280547395080012,
"calibration/batch_distribution_entropy": 0.25912112616058935,
"calibration/confidence_entropy": 0.22271742129617608,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.48657832278132584,
"calibration/mean_confidence": 0.921991283906116,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017881944444444443,
"completions/max_length": 4016.4,
"completions/max_terminated_length": 4016.4,
"completions/mean_length": 476.5828063964844,
"completions/mean_terminated_length": 485.360302734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 7.4,
"epoch": 0.023999700003749954,
"grad_norm": 0.024597520008683205,
"learning_rate": 1.1904761904761906e-06,
"loss": 0.0019,
"num_tokens": 17649071.0,
"reward": 0.667762839794159,
"reward_std": 0.48139882683753965,
"rewards/accuracy_reward": 0.2855034708976746,
"rewards/brier_reward": 0.35117203593254087,
"rewards/confidence_uniqueness_reward": 0.3601065635681152,
"rewards/format_reward": 0.7209201455116272,
"rewards/frontier_aurc_reward": 0.30220218896865847,
"rewards/frontier_coverage_1": 0.30220218896865847,
"rewards/frontier_coverage_10": 0.30220218896865847,
"rewards/frontier_coverage_15": 0.30220218896865847,
"rewards/frontier_coverage_20": 0.30220218896865847,
"rewards/frontier_coverage_25": 0.30220218896865847,
"rewards/frontier_coverage_5": 0.30220218896865847,
"rewards/frontier_ece_reward": 0.30220218896865847,
"signal/accuracy_reward/centered_abs_mean": 0.3181260824203491,
"signal/accuracy_reward/group_std_mean": 0.37762491106987,
"signal/accuracy_reward/group_zero_std_frac": 0.0833333358168602,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15906304121017456,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15906304121017456,
"signal/advantage_abs_mean": 0.3990109622478485,
"signal/advantage_pre_scale_abs_mean": 0.3990109622478485,
"signal/advantage_pre_scale_std": 0.4871573269367218,
"signal/advantage_std": 0.4871573269367218,
"signal/brier_reward/centered_abs_mean": 0.3128700017929077,
"signal/brier_reward/group_std_mean": 0.3669971525669098,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03910875022411346,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03910875022411346,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2221683979034424,
"signal/confidence_uniqueness_reward/group_std_mean": 0.27797139883041383,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0277710497379303,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0277710497379303,
"signal/format_reward/centered_abs_mean": 0.34768337607383726,
"signal/format_reward/group_std_mean": 0.41295074224472045,
"signal/format_reward/group_zero_std_frac": 0.01388888917863369,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.17384168803691863,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.17384168803691863,
"signal/frontier_aurc_reward/centered_abs_mean": 0.31191812753677367,
"signal/frontier_aurc_reward/group_std_mean": 0.36915679574012755,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0055833343416452404,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0055833343416452404,
"signal/frontier_coverage_1/centered_abs_mean": 0.31191812753677367,
"signal/frontier_coverage_1/group_std_mean": 0.36915679574012755,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0055833343416452404,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0055833343416452404,
"signal/frontier_coverage_10/centered_abs_mean": 0.31191812753677367,
"signal/frontier_coverage_10/group_std_mean": 0.36915679574012755,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0055833343416452404,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0055833343416452404,
"signal/frontier_coverage_15/centered_abs_mean": 0.31191812753677367,
"signal/frontier_coverage_15/group_std_mean": 0.36915679574012755,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0055833343416452404,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0055833343416452404,
"signal/frontier_coverage_20/centered_abs_mean": 0.31191812753677367,
"signal/frontier_coverage_20/group_std_mean": 0.36915679574012755,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0055833343416452404,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0055833343416452404,
"signal/frontier_coverage_25/centered_abs_mean": 0.31191812753677367,
"signal/frontier_coverage_25/group_std_mean": 0.36915679574012755,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0055833343416452404,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0055833343416452404,
"signal/frontier_coverage_5/centered_abs_mean": 0.31191812753677367,
"signal/frontier_coverage_5/group_std_mean": 0.36915679574012755,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0055833343416452404,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0055833343416452404,
"signal/frontier_ece_reward/centered_abs_mean": 0.31191812753677367,
"signal/frontier_ece_reward/group_std_mean": 0.36915679574012755,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03898976594209671,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03898976594209671,
"step": 10
},
{
"calibration/aurc": 0.567845924200485,
"calibration/batch_distribution_entropy": 0.3045943413639345,
"calibration/confidence_entropy": 0.24763369243483666,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5280282173660021,
"calibration/mean_confidence": 0.9096602757540054,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009895833333333326,
"completions/max_length": 3921.8,
"completions/max_terminated_length": 3921.8,
"completions/mean_length": 413.9700561523438,
"completions/mean_terminated_length": 418.13662109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 58.4,
"epoch": 0.03599955000562493,
"grad_norm": 0.0017141081625595689,
"learning_rate": 1.7857142857142859e-06,
"loss": -0.0112,
"num_tokens": 25519990.0,
"reward": 0.8229876399040222,
"reward_std": 0.37621534466743467,
"rewards/accuracy_reward": 0.3091145813465118,
"rewards/brier_reward": 0.4167811870574951,
"rewards/confidence_uniqueness_reward": 0.5070087611675262,
"rewards/format_reward": 0.9366319298744201,
"rewards/frontier_aurc_reward": 0.338156646490097,
"rewards/frontier_coverage_1": 0.338156646490097,
"rewards/frontier_coverage_10": 0.338156646490097,
"rewards/frontier_coverage_15": 0.338156646490097,
"rewards/frontier_coverage_20": 0.338156646490097,
"rewards/frontier_coverage_25": 0.338156646490097,
"rewards/frontier_coverage_5": 0.338156646490097,
"rewards/frontier_ece_reward": 0.338156646490097,
"signal/accuracy_reward/centered_abs_mean": 0.32050238847732543,
"signal/accuracy_reward/group_std_mean": 0.37917629480361936,
"signal/accuracy_reward/group_zero_std_frac": 0.07777778059244156,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16025119423866271,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.16025119423866271,
"signal/advantage_abs_mean": 0.30312079191207886,
"signal/advantage_pre_scale_abs_mean": 0.30312079191207886,
"signal/advantage_pre_scale_std": 0.3861180067062378,
"signal/advantage_std": 0.3861180067062378,
"signal/brier_reward/centered_abs_mean": 0.29925207495689393,
"signal/brier_reward/group_std_mean": 0.35090850591659545,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03740650936961174,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03740650936961174,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.18491619527339936,
"signal/confidence_uniqueness_reward/group_std_mean": 0.23462865352630616,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02311452440917492,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02311452440917492,
"signal/format_reward/centered_abs_mean": 0.10967882126569747,
"signal/format_reward/group_std_mean": 0.19272871911525727,
"signal/format_reward/group_zero_std_frac": 0.2833333410322666,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05483941063284874,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.05483941063284874,
"signal/frontier_aurc_reward/centered_abs_mean": 0.3117563307285309,
"signal/frontier_aurc_reward/group_std_mean": 0.36737927198410036,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005580438114702701,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005580438114702701,
"signal/frontier_coverage_1/centered_abs_mean": 0.3117563307285309,
"signal/frontier_coverage_1/group_std_mean": 0.36737927198410036,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005580438114702701,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005580438114702701,
"signal/frontier_coverage_10/centered_abs_mean": 0.3117563307285309,
"signal/frontier_coverage_10/group_std_mean": 0.36737927198410036,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005580438114702701,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005580438114702701,
"signal/frontier_coverage_15/centered_abs_mean": 0.3117563307285309,
"signal/frontier_coverage_15/group_std_mean": 0.36737927198410036,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005580438114702701,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005580438114702701,
"signal/frontier_coverage_20/centered_abs_mean": 0.3117563307285309,
"signal/frontier_coverage_20/group_std_mean": 0.36737927198410036,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005580438114702701,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005580438114702701,
"signal/frontier_coverage_25/centered_abs_mean": 0.3117563307285309,
"signal/frontier_coverage_25/group_std_mean": 0.36737927198410036,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005580438114702701,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005580438114702701,
"signal/frontier_coverage_5/centered_abs_mean": 0.3117563307285309,
"signal/frontier_coverage_5/group_std_mean": 0.36737927198410036,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005580438114702701,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005580438114702701,
"signal/frontier_ece_reward/centered_abs_mean": 0.3117563307285309,
"signal/frontier_ece_reward/group_std_mean": 0.36737927198410036,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03896954134106636,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03896954134106636,
"step": 15
},
{
"calibration/aurc": 0.47143046891943,
"calibration/batch_distribution_entropy": 0.4095787632848159,
"calibration/buffer_distribution_entropy": 0.3143981234334837,
"calibration/confidence_entropy": 0.31617773106851604,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.39434792860496753,
"calibration/mean_confidence": 0.8800821491423632,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009375,
"completions/max_length": 4021.8,
"completions/max_terminated_length": 4021.8,
"completions/mean_length": 432.70338134765626,
"completions/mean_terminated_length": 436.8342529296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 78.0,
"epoch": 0.04799940000749991,
"grad_norm": 0.001050017075613141,
"learning_rate": 2.380952380952381e-06,
"loss": -0.0105,
"num_tokens": 33618429.0,
"reward": 0.8896676540374756,
"reward_std": 0.28481470942497256,
"rewards/accuracy_reward": 0.4251736104488373,
"rewards/brier_reward": 0.5469748020172119,
"rewards/confidence_uniqueness_reward": 0.5929476499557496,
"rewards/format_reward": 0.985937488079071,
"rewards/frontier_aurc_reward": 0.16746631124988198,
"rewards/frontier_coverage_1": 0.1779847363010049,
"rewards/frontier_coverage_10": 0.1779847363010049,
"rewards/frontier_coverage_15": 0.1779847363010049,
"rewards/frontier_coverage_20": 0.1779847363010049,
"rewards/frontier_coverage_25": 0.1779847363010049,
"rewards/frontier_coverage_5": 0.1779847363010049,
"rewards/frontier_ece_reward": 0.15606855656951665,
"signal/accuracy_reward/centered_abs_mean": 0.2997070372104645,
"signal/accuracy_reward/group_std_mean": 0.3657579779624939,
"signal/accuracy_reward/group_zero_std_frac": 0.08055555820465088,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14985351860523224,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14985351860523224,
"signal/advantage_abs_mean": 0.2284111499786377,
"signal/advantage_pre_scale_abs_mean": 0.2284111499786377,
"signal/advantage_pre_scale_std": 0.2946593701839447,
"signal/advantage_std": 0.2946593701839447,
"signal/brier_reward/centered_abs_mean": 0.2612148314714432,
"signal/brier_reward/group_std_mean": 0.31739285588264465,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0326518539339304,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0326518539339304,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.186856010556221,
"signal/confidence_uniqueness_reward/group_std_mean": 0.22159543633460999,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023357001319527625,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023357001319527625,
"signal/format_reward/centered_abs_mean": 0.02544487789273262,
"signal/format_reward/group_std_mean": 0.05656049475073814,
"signal/format_reward/group_zero_std_frac": 0.7361111283302307,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01272243894636631,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01272243894636631,
"signal/frontier_aurc_reward/centered_abs_mean": 0.11768018077127636,
"signal/frontier_aurc_reward/group_std_mean": 0.14374305196106435,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0021064750850200652,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0021064750850200652,
"signal/frontier_coverage_1/centered_abs_mean": 0.13446774668991565,
"signal/frontier_coverage_1/group_std_mean": 0.17186392471194267,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024069725011941047,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024069725011941047,
"signal/frontier_coverage_10/centered_abs_mean": 0.13446774668991565,
"signal/frontier_coverage_10/group_std_mean": 0.17186392471194267,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024069725011941047,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024069725011941047,
"signal/frontier_coverage_15/centered_abs_mean": 0.13446774668991565,
"signal/frontier_coverage_15/group_std_mean": 0.17186392471194267,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024069725011941047,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024069725011941047,
"signal/frontier_coverage_20/centered_abs_mean": 0.13446774668991565,
"signal/frontier_coverage_20/group_std_mean": 0.17186392471194267,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024069725011941047,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024069725011941047,
"signal/frontier_coverage_25/centered_abs_mean": 0.13446774668991565,
"signal/frontier_coverage_25/group_std_mean": 0.17186392471194267,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024069725011941047,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024069725011941047,
"signal/frontier_coverage_5/centered_abs_mean": 0.13446774668991565,
"signal/frontier_coverage_5/group_std_mean": 0.17186392471194267,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024069725011941047,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024069725011941047,
"signal/frontier_ece_reward/centered_abs_mean": 0.2141528308391571,
"signal/frontier_ece_reward/group_std_mean": 0.2606072276830673,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.026769103854894637,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.026769103854894637,
"step": 20
},
{
"calibration/aurc": 0.36235694806855867,
"calibration/batch_distribution_entropy": 0.5577564400694477,
"calibration/buffer_distribution_entropy": 0.3620251114970633,
"calibration/confidence_entropy": 0.3781988685889721,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.020526315789473684,
"calibration/coverage@15%": 0.020526315789473684,
"calibration/coverage@20%": 0.07789473684210527,
"calibration/coverage@25%": 0.1470138210247103,
"calibration/coverage@30%": 0.39850523935370885,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.27206986145822254,
"calibration/mean_confidence": 0.8404789311929516,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00980902777777779,
"completions/max_length": 3977.8,
"completions/max_terminated_length": 3977.8,
"completions/mean_length": 482.4750061035156,
"completions/mean_terminated_length": 487.28203735351565,
"completions/min_length": 0.0,
"completions/min_terminated_length": 79.6,
"epoch": 0.05999925000937488,
"grad_norm": 0.020144827663898468,
"learning_rate": 2.9761904761904763e-06,
"loss": -0.0045,
"num_tokens": 42300989.0,
"reward": 0.9261262536048889,
"reward_std": 0.2307106077671051,
"rewards/accuracy_reward": 0.5281249940395355,
"rewards/brier_reward": 0.6501283884048462,
"rewards/confidence_uniqueness_reward": 0.6791275620460511,
"rewards/format_reward": 0.9868055582046509,
"rewards/frontier_aurc_reward": -0.004348812019452452,
"rewards/frontier_coverage_1": 0.00460605913103791,
"rewards/frontier_coverage_10": 0.00460605913103791,
"rewards/frontier_coverage_15": 0.00460605913103791,
"rewards/frontier_coverage_20": 0.00460605913103791,
"rewards/frontier_coverage_25": 0.00460605913103791,
"rewards/frontier_coverage_5": 0.00460605913103791,
"rewards/frontier_ece_reward": 0.01669727308326401,
"signal/accuracy_reward/centered_abs_mean": 0.2728081583976746,
"signal/accuracy_reward/group_std_mean": 0.33785536885261536,
"signal/accuracy_reward/group_zero_std_frac": 0.12222222238779068,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1364040791988373,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1364040791988373,
"signal/advantage_abs_mean": 0.1812939614057541,
"signal/advantage_pre_scale_abs_mean": 0.1812939614057541,
"signal/advantage_pre_scale_std": 0.24797289669513703,
"signal/advantage_std": 0.24797289669513703,
"signal/brier_reward/centered_abs_mean": 0.21097786724567413,
"signal/brier_reward/group_std_mean": 0.2636861175298691,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026372233405709267,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.026372233405709267,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1371775045990944,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1669593095779419,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0171471880748868,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0171471880748868,
"signal/format_reward/centered_abs_mean": 0.023567708767950536,
"signal/format_reward/group_std_mean": 0.047248493134975436,
"signal/format_reward/group_zero_std_frac": 0.7972222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011783854383975268,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011783854383975268,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028534052427858113,
"signal/frontier_aurc_reward/group_std_mean": 0.004276081500574946,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1075952796963975e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1075952796963975e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.03978681042790413,
"signal/frontier_coverage_1/group_std_mean": 0.06365430131554603,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0007121838862076402,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0007121838862076402,
"signal/frontier_coverage_10/centered_abs_mean": 0.03978681042790413,
"signal/frontier_coverage_10/group_std_mean": 0.06365430131554603,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007121838862076402,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007121838862076402,
"signal/frontier_coverage_15/centered_abs_mean": 0.03978681042790413,
"signal/frontier_coverage_15/group_std_mean": 0.06365430131554603,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007121838862076402,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007121838862076402,
"signal/frontier_coverage_20/centered_abs_mean": 0.03978681042790413,
"signal/frontier_coverage_20/group_std_mean": 0.06365430131554603,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007121838862076402,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007121838862076402,
"signal/frontier_coverage_25/centered_abs_mean": 0.03978681042790413,
"signal/frontier_coverage_25/group_std_mean": 0.06365430131554603,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007121838862076402,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007121838862076402,
"signal/frontier_coverage_5/centered_abs_mean": 0.03978681042790413,
"signal/frontier_coverage_5/group_std_mean": 0.06365430131554603,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0007121838862076402,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0007121838862076402,
"signal/frontier_ece_reward/centered_abs_mean": 0.1267393171787262,
"signal/frontier_ece_reward/group_std_mean": 0.1571869283914566,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.015842414647340774,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.015842414647340774,
"step": 25
},
{
"calibration/aurc": 0.27921286600632367,
"calibration/batch_distribution_entropy": 0.6577298107673348,
"calibration/buffer_distribution_entropy": 0.44022902249815105,
"calibration/confidence_entropy": 0.47141779391590316,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.007065217391304347,
"calibration/coverage@15%": 0.08577783589696426,
"calibration/coverage@20%": 0.1485360821538392,
"calibration/coverage@25%": 0.23173721030472935,
"calibration/coverage@30%": 0.5571452261540231,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.13807863251820546,
"calibration/mean_confidence": 0.786612541696696,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017187499999999977,
"completions/max_length": 4053.8,
"completions/max_terminated_length": 4053.8,
"completions/mean_length": 555.9796997070313,
"completions/mean_terminated_length": 565.7698974609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 118.4,
"epoch": 0.07199910001124986,
"grad_norm": 0.0005177477723918855,
"learning_rate": 3.5714285714285718e-06,
"loss": -0.011,
"num_tokens": 51815795.0,
"reward": 0.9665445923805237,
"reward_std": 0.20279234647750854,
"rewards/accuracy_reward": 0.5907986044883728,
"rewards/brier_reward": 0.7121957659721374,
"rewards/confidence_uniqueness_reward": 0.7083804368972778,
"rewards/format_reward": 0.9802083253860474,
"rewards/frontier_aurc_reward": -0.0033660000655800102,
"rewards/frontier_coverage_1": -0.005025790445506573,
"rewards/frontier_coverage_10": -0.005025790445506573,
"rewards/frontier_coverage_15": -0.005025790445506573,
"rewards/frontier_coverage_20": -0.005025790445506573,
"rewards/frontier_coverage_25": -0.005025790445506573,
"rewards/frontier_coverage_5": -0.005025790445506573,
"rewards/frontier_ece_reward": 0.032553022354841234,
"signal/accuracy_reward/centered_abs_mean": 0.2300238698720932,
"signal/accuracy_reward/group_std_mean": 0.2947371512651443,
"signal/accuracy_reward/group_zero_std_frac": 0.18888889104127884,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1150119349360466,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1150119349360466,
"signal/advantage_abs_mean": 0.15309852957725525,
"signal/advantage_pre_scale_abs_mean": 0.15309852957725525,
"signal/advantage_pre_scale_std": 0.2273882269859314,
"signal/advantage_std": 0.2273882269859314,
"signal/brier_reward/centered_abs_mean": 0.16554278135299683,
"signal/brier_reward/group_std_mean": 0.21267394721508026,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020692847669124603,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020692847669124603,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11078131943941116,
"signal/confidence_uniqueness_reward/group_std_mean": 0.14092794060707092,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013847664929926395,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013847664929926395,
"signal/format_reward/centered_abs_mean": 0.03274739608168602,
"signal/format_reward/group_std_mean": 0.06030413955450058,
"signal/format_reward/group_zero_std_frac": 0.7555555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01637369804084301,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01637369804084301,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018913287669420243,
"signal/frontier_aurc_reward/group_std_mean": 0.0029469260945916174,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.385478412383236e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.385478412383236e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.05050327777862549,
"signal/frontier_coverage_1/group_std_mean": 0.07173062860965729,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0009040086762979627,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0009040086762979627,
"signal/frontier_coverage_10/centered_abs_mean": 0.05050327777862549,
"signal/frontier_coverage_10/group_std_mean": 0.07173062860965729,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0009040086762979627,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009040086762979627,
"signal/frontier_coverage_15/centered_abs_mean": 0.05050327777862549,
"signal/frontier_coverage_15/group_std_mean": 0.07173062860965729,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009040086762979627,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009040086762979627,
"signal/frontier_coverage_20/centered_abs_mean": 0.05050327777862549,
"signal/frontier_coverage_20/group_std_mean": 0.07173062860965729,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009040086762979627,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009040086762979627,
"signal/frontier_coverage_25/centered_abs_mean": 0.05050327777862549,
"signal/frontier_coverage_25/group_std_mean": 0.07173062860965729,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009040086762979627,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009040086762979627,
"signal/frontier_coverage_5/centered_abs_mean": 0.05050327777862549,
"signal/frontier_coverage_5/group_std_mean": 0.07173062860965729,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0009040086762979627,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0009040086762979627,
"signal/frontier_ece_reward/centered_abs_mean": 0.08041608110070228,
"signal/frontier_ece_reward/group_std_mean": 0.10180892795324326,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.010052010137587785,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.010052010137587785,
"step": 30
},
{
"calibration/aurc": 0.24546937930520882,
"calibration/batch_distribution_entropy": 0.7074476039153248,
"calibration/buffer_distribution_entropy": 0.5221484411007967,
"calibration/confidence_entropy": 0.5234833271299201,
"calibration/coverage@0%": 0.003183023872679045,
"calibration/coverage@1%": 0.003183023872679045,
"calibration/coverage@10%": 0.022281167108753316,
"calibration/coverage@15%": 0.13367375062180317,
"calibration/coverage@20%": 0.2989711554240587,
"calibration/coverage@25%": 0.547051785170009,
"calibration/coverage@30%": 0.8145723684210526,
"calibration/coverage@5%": 0.022281167108753316,
"calibration/ece": 0.09596886211635568,
"calibration/mean_confidence": 0.7358937641480917,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01571180555555558,
"completions/max_length": 4022.0,
"completions/max_terminated_length": 4022.0,
"completions/mean_length": 609.8512084960937,
"completions/mean_terminated_length": 619.6409912109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 151.6,
"epoch": 0.08399895001312484,
"grad_norm": 0.0005236866418272257,
"learning_rate": 4.166666666666667e-06,
"loss": -0.0102,
"num_tokens": 61918721.0,
"reward": 0.9899388790130615,
"reward_std": 0.1820593684911728,
"rewards/accuracy_reward": 0.6299479126930236,
"rewards/brier_reward": 0.7458477735519409,
"rewards/confidence_uniqueness_reward": 0.7157063841819763,
"rewards/format_reward": 0.98046875,
"rewards/frontier_aurc_reward": -0.002861540112644434,
"rewards/frontier_coverage_1": -0.013961865846067668,
"rewards/frontier_coverage_10": -0.013961865846067668,
"rewards/frontier_coverage_15": -0.013961865846067668,
"rewards/frontier_coverage_20": -0.013961865846067668,
"rewards/frontier_coverage_25": -0.013961865846067668,
"rewards/frontier_coverage_5": -0.013961865846067668,
"rewards/frontier_ece_reward": 0.02869575172662735,
"signal/accuracy_reward/centered_abs_mean": 0.21008571982383728,
"signal/accuracy_reward/group_std_mean": 0.2665324449539185,
"signal/accuracy_reward/group_zero_std_frac": 0.2805555611848831,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10504285991191864,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10504285991191864,
"signal/advantage_abs_mean": 0.13846542537212372,
"signal/advantage_pre_scale_abs_mean": 0.13846542537212372,
"signal/advantage_pre_scale_std": 0.20890699625015258,
"signal/advantage_std": 0.20890699625015258,
"signal/brier_reward/centered_abs_mean": 0.14192103445529938,
"signal/brier_reward/group_std_mean": 0.1835268259048462,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017740129306912423,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017740129306912423,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1185295969247818,
"signal/confidence_uniqueness_reward/group_std_mean": 0.14487815797328948,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.014816199615597724,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014816199615597724,
"signal/format_reward/centered_abs_mean": 0.03253580778837204,
"signal/format_reward/group_std_mean": 0.05597815439105034,
"signal/format_reward/group_zero_std_frac": 0.7888888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01626790389418602,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01626790389418602,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001540156383998692,
"signal/frontier_aurc_reward/group_std_mean": 0.0023954100906848907,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.756879803200718e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.756879803200718e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.06873494014143944,
"signal/frontier_coverage_1/group_std_mean": 0.09156568795442581,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0012303554220125079,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012303554220125079,
"signal/frontier_coverage_10/centered_abs_mean": 0.06873494014143944,
"signal/frontier_coverage_10/group_std_mean": 0.09156568795442581,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0012303554220125079,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012303554220125079,
"signal/frontier_coverage_15/centered_abs_mean": 0.06873494014143944,
"signal/frontier_coverage_15/group_std_mean": 0.09156568795442581,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012303554220125079,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012303554220125079,
"signal/frontier_coverage_20/centered_abs_mean": 0.06873494014143944,
"signal/frontier_coverage_20/group_std_mean": 0.09156568795442581,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012303554220125079,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012303554220125079,
"signal/frontier_coverage_25/centered_abs_mean": 0.06873494014143944,
"signal/frontier_coverage_25/group_std_mean": 0.09156568795442581,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012303554220125079,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012303554220125079,
"signal/frontier_coverage_5/centered_abs_mean": 0.06873494014143944,
"signal/frontier_coverage_5/group_std_mean": 0.09156568795442581,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0012303554220125079,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012303554220125079,
"signal/frontier_ece_reward/centered_abs_mean": 0.05747309401631355,
"signal/frontier_ece_reward/group_std_mean": 0.074637171626091,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007184136752039194,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007184136752039194,
"step": 35
},
{
"calibration/aurc": 0.27279581724387775,
"calibration/batch_distribution_entropy": 0.7161346190573996,
"calibration/buffer_distribution_entropy": 0.5831439661286104,
"calibration/confidence_entropy": 0.5275154650697746,
"calibration/coverage@0%": 0.004199475065616798,
"calibration/coverage@1%": 0.004199475065616798,
"calibration/coverage@10%": 0.023622047244094488,
"calibration/coverage@15%": 0.06826086053227515,
"calibration/coverage@20%": 0.17686038278103913,
"calibration/coverage@25%": 0.3879944798344658,
"calibration/coverage@30%": 0.5670690424419453,
"calibration/coverage@5%": 0.004199475065616798,
"calibration/ece": 0.09923312232529506,
"calibration/mean_confidence": 0.7262206563584646,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014930555555555558,
"completions/max_length": 3704.8,
"completions/max_terminated_length": 3704.8,
"completions/mean_length": 634.3372436523438,
"completions/mean_terminated_length": 643.9880004882813,
"completions/min_length": 0.0,
"completions/min_terminated_length": 165.8,
"epoch": 0.09599880001499982,
"grad_norm": 0.0005042441189289093,
"learning_rate": 4.761904761904762e-06,
"loss": -0.0111,
"num_tokens": 72345806.0,
"reward": 1.0059186458587646,
"reward_std": 0.17311942875385283,
"rewards/accuracy_reward": 0.6474826335906982,
"rewards/brier_reward": 0.7549214363098145,
"rewards/confidence_uniqueness_reward": 0.7580878973007202,
"rewards/format_reward": 0.9828124880790711,
"rewards/frontier_aurc_reward": -0.002668565092608333,
"rewards/frontier_coverage_1": -0.018375001149252057,
"rewards/frontier_coverage_10": -0.018375001149252057,
"rewards/frontier_coverage_15": -0.018375001149252057,
"rewards/frontier_coverage_20": -0.018375001149252057,
"rewards/frontier_coverage_25": -0.018375001149252057,
"rewards/frontier_coverage_5": -0.018375001149252057,
"rewards/frontier_ece_reward": 0.029329166933894157,
"signal/accuracy_reward/centered_abs_mean": 0.19369032084941865,
"signal/accuracy_reward/group_std_mean": 0.25328629910945893,
"signal/accuracy_reward/group_zero_std_frac": 0.2972222208976746,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09684516042470932,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09684516042470932,
"signal/advantage_abs_mean": 0.1273159146308899,
"signal/advantage_pre_scale_abs_mean": 0.1273159146308899,
"signal/advantage_pre_scale_std": 0.2019648015499115,
"signal/advantage_std": 0.2019648015499115,
"signal/brier_reward/centered_abs_mean": 0.141173791885376,
"signal/brier_reward/group_std_mean": 0.18503097891807557,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017646723985672,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017646723985672,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.10431279838085175,
"signal/confidence_uniqueness_reward/group_std_mean": 0.13210797309875488,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013039099797606469,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013039099797606469,
"signal/format_reward/centered_abs_mean": 0.02906901091337204,
"signal/format_reward/group_std_mean": 0.05360684543848038,
"signal/format_reward/group_zero_std_frac": 0.7833333492279053,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01453450545668602,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01453450545668602,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019563521724194287,
"signal/frontier_aurc_reward/group_std_mean": 0.00321835745126009,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5018703420064413e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5018703420064413e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.08069958090782166,
"signal/frontier_coverage_1/group_std_mean": 0.10802106261253357,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001444522407837212,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001444522407837212,
"signal/frontier_coverage_10/centered_abs_mean": 0.08069958090782166,
"signal/frontier_coverage_10/group_std_mean": 0.10802106261253357,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001444522407837212,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001444522407837212,
"signal/frontier_coverage_15/centered_abs_mean": 0.08069958090782166,
"signal/frontier_coverage_15/group_std_mean": 0.10802106261253357,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001444522407837212,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001444522407837212,
"signal/frontier_coverage_20/centered_abs_mean": 0.08069958090782166,
"signal/frontier_coverage_20/group_std_mean": 0.10802106261253357,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001444522407837212,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001444522407837212,
"signal/frontier_coverage_25/centered_abs_mean": 0.08069958090782166,
"signal/frontier_coverage_25/group_std_mean": 0.10802106261253357,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001444522407837212,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001444522407837212,
"signal/frontier_coverage_5/centered_abs_mean": 0.08069958090782166,
"signal/frontier_coverage_5/group_std_mean": 0.10802106261253357,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001444522407837212,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001444522407837212,
"signal/frontier_ece_reward/centered_abs_mean": 0.05297911018133163,
"signal/frontier_ece_reward/group_std_mean": 0.07072616964578629,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006622388772666454,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006622388772666454,
"step": 40
},
{
"calibration/aurc": 0.21362556514859965,
"calibration/batch_distribution_entropy": 0.7249410130483167,
"calibration/buffer_distribution_entropy": 0.6183385204926428,
"calibration/confidence_entropy": 0.4615105516463262,
"calibration/coverage@0%": 0.015706806282722512,
"calibration/coverage@1%": 0.015706806282722512,
"calibration/coverage@10%": 0.12766332802185293,
"calibration/coverage@15%": 0.26664864557250173,
"calibration/coverage@20%": 0.44956878151185337,
"calibration/coverage@25%": 0.7303769022780143,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.015706806282722512,
"calibration/ece": 0.10484668118363438,
"calibration/mean_confidence": 0.7590440665093695,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017013888888888884,
"completions/max_length": 3743.2,
"completions/max_terminated_length": 3743.2,
"completions/mean_length": 654.5295166015625,
"completions/mean_terminated_length": 665.859033203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 173.0,
"epoch": 0.1079986500168748,
"grad_norm": 0.00047160135000012815,
"learning_rate": 4.909638554216868e-06,
"loss": -0.0147,
"num_tokens": 83021250.0,
"reward": 1.0147064447402954,
"reward_std": 0.1708086371421814,
"rewards/accuracy_reward": 0.6501736164093017,
"rewards/brier_reward": 0.7591644525527954,
"rewards/confidence_uniqueness_reward": 0.8012210249900817,
"rewards/format_reward": 0.9823784589767456,
"rewards/frontier_aurc_reward": -0.0025879590306431056,
"rewards/frontier_coverage_1": -0.010252609569579362,
"rewards/frontier_coverage_10": -0.010252609569579362,
"rewards/frontier_coverage_15": -0.010252609569579362,
"rewards/frontier_coverage_20": -0.010252609569579362,
"rewards/frontier_coverage_25": -0.010252609569579362,
"rewards/frontier_coverage_5": -0.010252609569579362,
"rewards/frontier_ece_reward": 0.036237184703350064,
"signal/accuracy_reward/centered_abs_mean": 0.19266493022441863,
"signal/accuracy_reward/group_std_mean": 0.2547257900238037,
"signal/accuracy_reward/group_zero_std_frac": 0.2805555611848831,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09633246511220932,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09633246511220932,
"signal/advantage_abs_mean": 0.12555780559778212,
"signal/advantage_pre_scale_abs_mean": 0.12555780559778212,
"signal/advantage_pre_scale_std": 0.19953626692295073,
"signal/advantage_std": 0.19953626692295073,
"signal/brier_reward/centered_abs_mean": 0.14807810485363007,
"signal/brier_reward/group_std_mean": 0.19255775809288025,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01850976310670376,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01850976310670376,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0964215949177742,
"signal/confidence_uniqueness_reward/group_std_mean": 0.12510152906179428,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012052699364721775,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012052699364721775,
"signal/format_reward/centered_abs_mean": 0.02882486991584301,
"signal/format_reward/group_std_mean": 0.05173756778240204,
"signal/format_reward/group_zero_std_frac": 0.794444453716278,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014412434957921504,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.014412434957921504,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028790227603167295,
"signal/frontier_aurc_reward/group_std_mean": 0.004534664563834667,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.153450401849113e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.153450401849113e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.08745122700929642,
"signal/frontier_coverage_1/group_std_mean": 0.11857426017522812,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015653769718483091,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015653769718483091,
"signal/frontier_coverage_10/centered_abs_mean": 0.08745122700929642,
"signal/frontier_coverage_10/group_std_mean": 0.11857426017522812,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015653769718483091,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015653769718483091,
"signal/frontier_coverage_15/centered_abs_mean": 0.08745122700929642,
"signal/frontier_coverage_15/group_std_mean": 0.11857426017522812,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015653769718483091,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015653769718483091,
"signal/frontier_coverage_20/centered_abs_mean": 0.08745122700929642,
"signal/frontier_coverage_20/group_std_mean": 0.11857426017522812,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015653769718483091,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015653769718483091,
"signal/frontier_coverage_25/centered_abs_mean": 0.08745122700929642,
"signal/frontier_coverage_25/group_std_mean": 0.11857426017522812,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015653769718483091,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015653769718483091,
"signal/frontier_coverage_5/centered_abs_mean": 0.08745122700929642,
"signal/frontier_coverage_5/group_std_mean": 0.11857426017522812,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015653769718483091,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015653769718483091,
"signal/frontier_ece_reward/centered_abs_mean": 0.05844959244132042,
"signal/frontier_ece_reward/group_std_mean": 0.07468613833189011,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007306199055165052,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007306199055165052,
"step": 45
},
{
"calibration/aurc": 0.3984856338347365,
"calibration/batch_distribution_entropy": 0.7660177852564631,
"calibration/buffer_distribution_entropy": 0.6392927797521206,
"calibration/confidence_entropy": 0.4476935003558987,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.02168021680216802,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.22976067826983196,
"calibration/mean_confidence": 0.7545236404911386,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015885416666666673,
"completions/max_length": 3047.4,
"completions/max_terminated_length": 3047.4,
"completions/mean_length": 653.1490600585937,
"completions/mean_terminated_length": 663.627978515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 182.2,
"epoch": 0.11999850001874976,
"grad_norm": 0.0005696824518963695,
"learning_rate": 4.759036144578314e-06,
"loss": -0.0146,
"num_tokens": 93643127.0,
"reward": 1.014067542552948,
"reward_std": 0.163968026638031,
"rewards/accuracy_reward": 0.6374131798744201,
"rewards/brier_reward": 0.7471580624580383,
"rewards/confidence_uniqueness_reward": 0.8459334373474121,
"rewards/format_reward": 0.9841145873069763,
"rewards/frontier_aurc_reward": -0.002862738911062479,
"rewards/frontier_coverage_1": -0.0022450896329246463,
"rewards/frontier_coverage_10": -0.0022450896329246463,
"rewards/frontier_coverage_15": -0.0022450896329246463,
"rewards/frontier_coverage_20": -0.0022450896329246463,
"rewards/frontier_coverage_25": -0.0022450896329246463,
"rewards/frontier_coverage_5": -0.0022450896329246463,
"rewards/frontier_ece_reward": 0.03567677363753319,
"signal/accuracy_reward/centered_abs_mean": 0.181884765625,
"signal/accuracy_reward/group_std_mean": 0.24014606773853303,
"signal/accuracy_reward/group_zero_std_frac": 0.3194444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0909423828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0909423828125,
"signal/advantage_abs_mean": 0.12030979841947556,
"signal/advantage_pre_scale_abs_mean": 0.12030979841947556,
"signal/advantage_pre_scale_std": 0.19647813737392425,
"signal/advantage_std": 0.19647813737392425,
"signal/brier_reward/centered_abs_mean": 0.15161194503307343,
"signal/brier_reward/group_std_mean": 0.1975580185651779,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01895149312913418,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01895149312913418,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08682139664888382,
"signal/confidence_uniqueness_reward/group_std_mean": 0.11508260518312455,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010852674581110477,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010852674581110477,
"signal/format_reward/centered_abs_mean": 0.02594943605363369,
"signal/format_reward/group_std_mean": 0.048407307267189024,
"signal/format_reward/group_zero_std_frac": 0.7972222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012974718026816845,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012974718026816845,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0035359882283955814,
"signal/frontier_aurc_reward/group_std_mean": 0.005427685286849737,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.329418683890254e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.329418683890254e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.08081827610731125,
"signal/frontier_coverage_1/group_std_mean": 0.11278729438781739,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014466470805928112,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014466470805928112,
"signal/frontier_coverage_10/centered_abs_mean": 0.08081827610731125,
"signal/frontier_coverage_10/group_std_mean": 0.11278729438781739,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014466470805928112,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014466470805928112,
"signal/frontier_coverage_15/centered_abs_mean": 0.08081827610731125,
"signal/frontier_coverage_15/group_std_mean": 0.11278729438781739,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014466470805928112,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014466470805928112,
"signal/frontier_coverage_20/centered_abs_mean": 0.08081827610731125,
"signal/frontier_coverage_20/group_std_mean": 0.11278729438781739,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014466470805928112,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014466470805928112,
"signal/frontier_coverage_25/centered_abs_mean": 0.08081827610731125,
"signal/frontier_coverage_25/group_std_mean": 0.11278729438781739,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014466470805928112,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014466470805928112,
"signal/frontier_coverage_5/centered_abs_mean": 0.08081827610731125,
"signal/frontier_coverage_5/group_std_mean": 0.11278729438781739,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014466470805928112,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014466470805928112,
"signal/frontier_ece_reward/centered_abs_mean": 0.05788221508264542,
"signal/frontier_ece_reward/group_std_mean": 0.07291264832019806,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007235276885330677,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007235276885330677,
"step": 50
},
{
"epoch": 0.11999850001874976,
"eval_calibration/aurc": 0.22528452463011495,
"eval_calibration/batch_distribution_entropy": 0.7327676302222469,
"eval_calibration/buffer_distribution_entropy": 0.6531455892362769,
"eval_calibration/confidence_entropy": 0.45147091813373913,
"eval_calibration/coverage@0%": 0.12339829749103944,
"eval_calibration/coverage@1%": 0.12339829749103944,
"eval_calibration/coverage@10%": 0.2075716845878136,
"eval_calibration/coverage@15%": 0.2720990143369176,
"eval_calibration/coverage@20%": 0.4371975806451613,
"eval_calibration/coverage@25%": 0.6719758064516129,
"eval_calibration/coverage@30%": 0.9375,
"eval_calibration/coverage@5%": 0.12339829749103944,
"eval_calibration/ece": 0.1648209544670118,
"eval_calibration/mean_confidence": 0.7559882185052436,
"eval_completions/clipped_ratio": 0.018229166666666668,
"eval_completions/max_length": 2276.0,
"eval_completions/max_terminated_length": 2276.0,
"eval_completions/mean_length": 635.2345784505209,
"eval_completions/mean_terminated_length": 646.9802856445312,
"eval_completions/min_length": 51.666666666666664,
"eval_completions/min_terminated_length": 214.5,
"eval_loss": 0.0,
"eval_num_tokens": 93643127.0,
"eval_reward": 1.0190295179684956,
"eval_reward_std": 0.29277849197387695,
"eval_rewards/accuracy_reward": 0.65625,
"eval_rewards/brier_reward": 0.7608515123526255,
"eval_rewards/confidence_uniqueness_reward": 0.8176768521467844,
"eval_rewards/format_reward": 0.980034718910853,
"eval_rewards/frontier_aurc_reward": -0.002395169634837657,
"eval_rewards/frontier_coverage_1": -0.006012833837303333,
"eval_rewards/frontier_coverage_10": -0.006012833837303333,
"eval_rewards/frontier_coverage_15": -0.006012833837303333,
"eval_rewards/frontier_coverage_20": -0.006012833837303333,
"eval_rewards/frontier_coverage_25": -0.006012833837303333,
"eval_rewards/frontier_coverage_5": -0.006012833837303333,
"eval_rewards/frontier_ece_reward": 0.034077832475304604,
"eval_runtime": 205.5337,
"eval_samples_per_second": 4.865,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4380425264437993,
"eval_signal/accuracy_reward/group_std_mean": 0.4743858923514684,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21902126322189966,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21902126322189966,
"eval_signal/advantage_abs_mean": 0.2553912376364072,
"eval_signal/advantage_pre_scale_abs_mean": 0.2553912376364072,
"eval_signal/advantage_pre_scale_std": 0.29115622242291767,
"eval_signal/advantage_std": 0.29115622242291767,
"eval_signal/brier_reward/centered_abs_mean": 0.22910910844802856,
"eval_signal/brier_reward/group_std_mean": 0.28499897321065265,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02863863855600357,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02863863855600357,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.10962619632482529,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.15229293455680212,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01370327454060316,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01370327454060316,
"eval_signal/format_reward/centered_abs_mean": 0.03803168454517921,
"eval_signal/format_reward/group_std_mean": 0.09500421459476154,
"eval_signal/format_reward/group_zero_std_frac": 0.5277777910232544,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.019015842272589605,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.019015842272589605,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0033296199593072138,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005719099193811417,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.9600197346298955e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.9600197346298955e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.10264075919985771,
"eval_signal/frontier_coverage_1/group_std_mean": 0.16208957880735397,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001837269403040409,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001837269403040409,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.10264075919985771,
"eval_signal/frontier_coverage_10/group_std_mean": 0.16208957880735397,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001837269403040409,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001837269403040409,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.10264075919985771,
"eval_signal/frontier_coverage_15/group_std_mean": 0.16208957880735397,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001837269403040409,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001837269403040409,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.10264075919985771,
"eval_signal/frontier_coverage_20/group_std_mean": 0.16208957880735397,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001837269403040409,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001837269403040409,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.10264075919985771,
"eval_signal/frontier_coverage_25/group_std_mean": 0.16208957880735397,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001837269403040409,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001837269403040409,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.10264075919985771,
"eval_signal/frontier_coverage_5/group_std_mean": 0.16208957880735397,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001837269403040409,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001837269403040409,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.06874044549961884,
"eval_signal/frontier_ece_reward/group_std_mean": 0.08980573217074077,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008592555687452355,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008592555687452355,
"eval_steps_per_second": 0.029,
"step": 50
},
{
"calibration/aurc": 0.30905254136426186,
"calibration/batch_distribution_entropy": 0.8075596443590124,
"calibration/buffer_distribution_entropy": 0.6633267710604588,
"calibration/confidence_entropy": 0.49110166338550343,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.3168765708000028,
"calibration/coverage@25%": 0.5018372703412074,
"calibration/coverage@30%": 0.5417322834645669,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.15189953369437614,
"calibration/mean_confidence": 0.7286333664825811,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015885416666666673,
"completions/max_length": 3439.6,
"completions/max_terminated_length": 3439.6,
"completions/mean_length": 658.5401977539062,
"completions/mean_terminated_length": 669.3446044921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 142.2,
"epoch": 0.13199835002062474,
"grad_norm": 0.00043861696030944586,
"learning_rate": 4.60843373493976e-06,
"loss": -0.0138,
"num_tokens": 104310086.0,
"reward": 1.0262351274490356,
"reward_std": 0.15907953977584838,
"rewards/accuracy_reward": 0.6447048544883728,
"rewards/brier_reward": 0.7629892230033875,
"rewards/confidence_uniqueness_reward": 0.9056627631187439,
"rewards/format_reward": 0.9841145873069763,
"rewards/frontier_aurc_reward": -0.002367356652393937,
"rewards/frontier_coverage_1": -0.0021971354028210042,
"rewards/frontier_coverage_10": -0.0021971354028210042,
"rewards/frontier_coverage_15": -0.0021971354028210042,
"rewards/frontier_coverage_20": -0.0021971354028210042,
"rewards/frontier_coverage_25": -0.0021971354028210042,
"rewards/frontier_coverage_5": -0.0021971354028210042,
"rewards/frontier_ece_reward": 0.02817784361541271,
"signal/accuracy_reward/centered_abs_mean": 0.18217773735523224,
"signal/accuracy_reward/group_std_mean": 0.23912697434425353,
"signal/accuracy_reward/group_zero_std_frac": 0.325,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09108886867761612,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09108886867761612,
"signal/advantage_abs_mean": 0.11837138533592224,
"signal/advantage_pre_scale_abs_mean": 0.11837138533592224,
"signal/advantage_pre_scale_std": 0.1914423018693924,
"signal/advantage_std": 0.1914423018693924,
"signal/brier_reward/centered_abs_mean": 0.1487715631723404,
"signal/brier_reward/group_std_mean": 0.1938774347305298,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01859644539654255,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01859644539654255,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06237577944993973,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08584694117307663,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007796972431242466,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007796972431242466,
"signal/format_reward/centered_abs_mean": 0.0235948346555233,
"signal/format_reward/group_std_mean": 0.040967592224478724,
"signal/format_reward/group_zero_std_frac": 0.8416666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01179741732776165,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01179741732776165,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002605421328917146,
"signal/frontier_aurc_reward/group_std_mean": 0.004233243642374873,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6637040941277516e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6637040941277516e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.09448704570531845,
"signal/frontier_coverage_1/group_std_mean": 0.13038320094347,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001691318047232926,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001691318047232926,
"signal/frontier_coverage_10/centered_abs_mean": 0.09448704570531845,
"signal/frontier_coverage_10/group_std_mean": 0.13038320094347,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001691318047232926,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001691318047232926,
"signal/frontier_coverage_15/centered_abs_mean": 0.09448704570531845,
"signal/frontier_coverage_15/group_std_mean": 0.13038320094347,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001691318047232926,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001691318047232926,
"signal/frontier_coverage_20/centered_abs_mean": 0.09448704570531845,
"signal/frontier_coverage_20/group_std_mean": 0.13038320094347,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001691318047232926,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001691318047232926,
"signal/frontier_coverage_25/centered_abs_mean": 0.09448704570531845,
"signal/frontier_coverage_25/group_std_mean": 0.13038320094347,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001691318047232926,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001691318047232926,
"signal/frontier_coverage_5/centered_abs_mean": 0.09448704570531845,
"signal/frontier_coverage_5/group_std_mean": 0.13038320094347,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001691318047232926,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001691318047232926,
"signal/frontier_ece_reward/centered_abs_mean": 0.04688545688986778,
"signal/frontier_ece_reward/group_std_mean": 0.06142409965395927,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005860682111233473,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005860682111233473,
"step": 55
},
{
"calibration/aurc": 0.3345369982847062,
"calibration/batch_distribution_entropy": 0.7979723488465243,
"calibration/buffer_distribution_entropy": 0.6829944006341414,
"calibration/confidence_entropy": 0.4384303562223697,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.011548556430446194,
"calibration/coverage@15%": 0.2143167650918635,
"calibration/coverage@20%": 0.342257217847769,
"calibration/coverage@25%": 0.3931758530183727,
"calibration/coverage@30%": 0.43368983957219254,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.19744679022940095,
"calibration/mean_confidence": 0.7565729020638163,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01796875,
"completions/max_length": 3591.8,
"completions/max_terminated_length": 3591.8,
"completions/mean_length": 652.4442749023438,
"completions/mean_terminated_length": 664.4763916015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 170.8,
"epoch": 0.14399820002249972,
"grad_norm": 0.0004398068122100085,
"learning_rate": 4.457831325301205e-06,
"loss": -0.0164,
"num_tokens": 114922820.0,
"reward": 1.0199653863906861,
"reward_std": 0.1711801379919052,
"rewards/accuracy_reward": 0.6309895873069763,
"rewards/brier_reward": 0.7476680040359497,
"rewards/confidence_uniqueness_reward": 0.9258608222007751,
"rewards/format_reward": 0.9817708253860473,
"rewards/frontier_aurc_reward": -0.002807429013773799,
"rewards/frontier_coverage_1": 0.004628715617582202,
"rewards/frontier_coverage_10": 0.004628715617582202,
"rewards/frontier_coverage_15": 0.004628715617582202,
"rewards/frontier_coverage_20": 0.004628715617582202,
"rewards/frontier_coverage_25": 0.004628715617582202,
"rewards/frontier_coverage_5": 0.004628715617582202,
"rewards/frontier_ece_reward": 0.03157777301967144,
"signal/accuracy_reward/centered_abs_mean": 0.1884711354970932,
"signal/accuracy_reward/group_std_mean": 0.2461162716150284,
"signal/accuracy_reward/group_zero_std_frac": 0.31388889253139496,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0942355677485466,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0942355677485466,
"signal/advantage_abs_mean": 0.12647135853767394,
"signal/advantage_pre_scale_abs_mean": 0.12647135853767394,
"signal/advantage_pre_scale_std": 0.2025492161512375,
"signal/advantage_std": 0.2025492161512375,
"signal/brier_reward/centered_abs_mean": 0.16874447762966155,
"signal/brier_reward/group_std_mean": 0.22043513357639313,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021093059703707694,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021093059703707694,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05148363336920738,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08058208972215652,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006435454171150923,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006435454171150923,
"signal/format_reward/centered_abs_mean": 0.03021918386220932,
"signal/format_reward/group_std_mean": 0.05589370355010033,
"signal/format_reward/group_zero_std_frac": 0.7694444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01510959193110466,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01510959193110466,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003583089355379343,
"signal/frontier_aurc_reward/group_std_mean": 0.005511940456926823,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.413729715859517e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.413729715859517e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.09764359593391418,
"signal/frontier_coverage_1/group_std_mean": 0.1448903352022171,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017478203400969506,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017478203400969506,
"signal/frontier_coverage_10/centered_abs_mean": 0.09764359593391418,
"signal/frontier_coverage_10/group_std_mean": 0.1448903352022171,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017478203400969506,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017478203400969506,
"signal/frontier_coverage_15/centered_abs_mean": 0.09764359593391418,
"signal/frontier_coverage_15/group_std_mean": 0.1448903352022171,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017478203400969506,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017478203400969506,
"signal/frontier_coverage_20/centered_abs_mean": 0.09764359593391418,
"signal/frontier_coverage_20/group_std_mean": 0.1448903352022171,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017478203400969506,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017478203400969506,
"signal/frontier_coverage_25/centered_abs_mean": 0.09764359593391418,
"signal/frontier_coverage_25/group_std_mean": 0.1448903352022171,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017478203400969506,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017478203400969506,
"signal/frontier_coverage_5/centered_abs_mean": 0.09764359593391418,
"signal/frontier_coverage_5/group_std_mean": 0.1448903352022171,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017478203400969506,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017478203400969506,
"signal/frontier_ece_reward/centered_abs_mean": 0.05283080860972404,
"signal/frontier_ece_reward/group_std_mean": 0.06684889793395996,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006603851076215505,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006603851076215505,
"step": 60
},
{
"calibration/aurc": 0.25178061436936255,
"calibration/batch_distribution_entropy": 0.782224239763399,
"calibration/buffer_distribution_entropy": 0.6979801492843933,
"calibration/confidence_entropy": 0.38540589096164324,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.025,
"calibration/coverage@15%": 0.326465196377979,
"calibration/coverage@20%": 0.6091465609610507,
"calibration/coverage@25%": 0.6931875417068663,
"calibration/coverage@30%": 0.7474254742547426,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.16355479654311317,
"calibration/mean_confidence": 0.7571751739178787,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017708333333333326,
"completions/max_length": 3540.0,
"completions/max_terminated_length": 3540.0,
"completions/mean_length": 614.4053955078125,
"completions/mean_terminated_length": 625.5040283203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 138.0,
"epoch": 0.1559980500243747,
"grad_norm": 0.0004897533799521625,
"learning_rate": 4.307228915662651e-06,
"loss": -0.0152,
"num_tokens": 125094818.0,
"reward": 1.0328210592269897,
"reward_std": 0.16226947605609893,
"rewards/accuracy_reward": 0.6543402791023254,
"rewards/brier_reward": 0.7571277022361755,
"rewards/confidence_uniqueness_reward": 0.9207069873809814,
"rewards/format_reward": 0.9821180462837219,
"rewards/frontier_aurc_reward": -0.0027203528210520745,
"rewards/frontier_coverage_1": 0.0034012388437986373,
"rewards/frontier_coverage_10": 0.0034012388437986373,
"rewards/frontier_coverage_15": 0.0034012388437986373,
"rewards/frontier_coverage_20": 0.0034012388437986373,
"rewards/frontier_coverage_25": 0.0034012388437986373,
"rewards/frontier_coverage_5": 0.0034012388437986373,
"rewards/frontier_ece_reward": 0.03636742420494556,
"signal/accuracy_reward/centered_abs_mean": 0.16812065839767457,
"signal/accuracy_reward/group_std_mean": 0.23046530783176422,
"signal/accuracy_reward/group_zero_std_frac": 0.31111111044883727,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08406032919883728,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08406032919883728,
"signal/advantage_abs_mean": 0.11638958901166915,
"signal/advantage_pre_scale_abs_mean": 0.11638958901166915,
"signal/advantage_pre_scale_std": 0.19259226322174072,
"signal/advantage_std": 0.19259226322174072,
"signal/brier_reward/centered_abs_mean": 0.1747972458600998,
"signal/brier_reward/group_std_mean": 0.2305249333381653,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021849655732512473,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021849655732512473,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.056257478147745135,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08205792903900147,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007032184768468142,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007032184768468142,
"signal/format_reward/centered_abs_mean": 0.02969835065305233,
"signal/format_reward/group_std_mean": 0.05096030831336975,
"signal/format_reward/group_zero_std_frac": 0.8083333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014849175326526165,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.014849175326526165,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032653619535267355,
"signal/frontier_aurc_reward/group_std_mean": 0.004880654439330101,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.844997795065865e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.844997795065865e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1121538296341896,
"signal/frontier_coverage_1/group_std_mean": 0.16928686797618867,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002007553423754871,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002007553423754871,
"signal/frontier_coverage_10/centered_abs_mean": 0.1121538296341896,
"signal/frontier_coverage_10/group_std_mean": 0.16928686797618867,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002007553423754871,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002007553423754871,
"signal/frontier_coverage_15/centered_abs_mean": 0.1121538296341896,
"signal/frontier_coverage_15/group_std_mean": 0.16928686797618867,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002007553423754871,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002007553423754871,
"signal/frontier_coverage_20/centered_abs_mean": 0.1121538296341896,
"signal/frontier_coverage_20/group_std_mean": 0.16928686797618867,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002007553423754871,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002007553423754871,
"signal/frontier_coverage_25/centered_abs_mean": 0.1121538296341896,
"signal/frontier_coverage_25/group_std_mean": 0.16928686797618867,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002007553423754871,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002007553423754871,
"signal/frontier_coverage_5/centered_abs_mean": 0.1121538296341896,
"signal/frontier_coverage_5/group_std_mean": 0.16928686797618867,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002007553423754871,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002007553423754871,
"signal/frontier_ece_reward/centered_abs_mean": 0.05328927487134934,
"signal/frontier_ece_reward/group_std_mean": 0.06582499742507934,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006661159358918667,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006661159358918667,
"step": 65
},
{
"calibration/aurc": 0.2951920429356995,
"calibration/batch_distribution_entropy": 0.6789171581871921,
"calibration/buffer_distribution_entropy": 0.7060502188884911,
"calibration/confidence_entropy": 0.3652801822043075,
"calibration/coverage@0%": 0.016976127320954906,
"calibration/coverage@1%": 0.016976127320954906,
"calibration/coverage@10%": 0.11777188328912466,
"calibration/coverage@15%": 0.13580901856763924,
"calibration/coverage@20%": 0.15119363395225466,
"calibration/coverage@25%": 0.2842619485873057,
"calibration/coverage@30%": 0.5154877819661559,
"calibration/coverage@5%": 0.070026525198939,
"calibration/ece": 0.19455041095714468,
"calibration/mean_confidence": 0.806789845714594,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.020138888888888908,
"completions/max_length": 3470.6,
"completions/max_terminated_length": 3470.6,
"completions/mean_length": 599.9177124023438,
"completions/mean_terminated_length": 612.2438720703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 124.2,
"epoch": 0.16799790002624967,
"grad_norm": 0.00047923889360390604,
"learning_rate": 4.156626506024097e-06,
"loss": -0.0193,
"num_tokens": 135084014.0,
"reward": 1.017272448539734,
"reward_std": 0.16175627410411836,
"rewards/accuracy_reward": 0.6318576335906982,
"rewards/brier_reward": 0.7436864018440247,
"rewards/confidence_uniqueness_reward": 0.9008719086647033,
"rewards/format_reward": 0.9797742962837219,
"rewards/frontier_aurc_reward": -0.0031225522980093956,
"rewards/frontier_coverage_1": 0.014106421242468059,
"rewards/frontier_coverage_10": 0.014106421242468059,
"rewards/frontier_coverage_15": 0.014106421242468059,
"rewards/frontier_coverage_20": 0.014106421242468059,
"rewards/frontier_coverage_25": 0.014106421242468059,
"rewards/frontier_coverage_5": 0.014106421242468059,
"rewards/frontier_ece_reward": 0.03542088866233826,
"signal/accuracy_reward/centered_abs_mean": 0.1743109792470932,
"signal/accuracy_reward/group_std_mean": 0.22547804117202758,
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0871554896235466,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0871554896235466,
"signal/advantage_abs_mean": 0.12052754461765289,
"signal/advantage_pre_scale_abs_mean": 0.12052754461765289,
"signal/advantage_pre_scale_std": 0.20319488048553466,
"signal/advantage_std": 0.20319488048553466,
"signal/brier_reward/centered_abs_mean": 0.1648993283510208,
"signal/brier_reward/group_std_mean": 0.2110469877719879,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0206124160438776,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0206124160438776,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06894725412130356,
"signal/confidence_uniqueness_reward/group_std_mean": 0.097543103992939,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008618406765162945,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008618406765162945,
"signal/format_reward/centered_abs_mean": 0.03365342915058136,
"signal/format_reward/group_std_mean": 0.05752314925193787,
"signal/format_reward/group_zero_std_frac": 0.7833333373069763,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01682671457529068,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01682671457529068,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0033782635815441608,
"signal/frontier_aurc_reward/group_std_mean": 0.004819054994732142,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.047091592336074e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.047091592336074e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.07730035781860352,
"signal/frontier_coverage_1/group_std_mean": 0.11400771141052246,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013836764032021166,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013836764032021166,
"signal/frontier_coverage_10/centered_abs_mean": 0.07730035781860352,
"signal/frontier_coverage_10/group_std_mean": 0.11400771141052246,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013836764032021166,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013836764032021166,
"signal/frontier_coverage_15/centered_abs_mean": 0.07730035781860352,
"signal/frontier_coverage_15/group_std_mean": 0.11400771141052246,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013836764032021166,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013836764032021166,
"signal/frontier_coverage_20/centered_abs_mean": 0.07730035781860352,
"signal/frontier_coverage_20/group_std_mean": 0.11400771141052246,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013836764032021166,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013836764032021166,
"signal/frontier_coverage_25/centered_abs_mean": 0.07730035781860352,
"signal/frontier_coverage_25/group_std_mean": 0.11400771141052246,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013836764032021166,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013836764032021166,
"signal/frontier_coverage_5/centered_abs_mean": 0.07730035781860352,
"signal/frontier_coverage_5/group_std_mean": 0.11400771141052246,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013836764032021166,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013836764032021166,
"signal/frontier_ece_reward/centered_abs_mean": 0.0494035005569458,
"signal/frontier_ece_reward/group_std_mean": 0.060732795298099516,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006175437569618225,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006175437569618225,
"step": 70
},
{
"calibration/aurc": 0.23588326946851917,
"calibration/batch_distribution_entropy": 0.7336437964325994,
"calibration/buffer_distribution_entropy": 0.7058757308269206,
"calibration/confidence_entropy": 0.4050379491719296,
"calibration/coverage@0%": 0.0026345104155062935,
"calibration/coverage@1%": 0.0026345104155062935,
"calibration/coverage@10%": 0.15062862394804186,
"calibration/coverage@15%": 0.24837791370120765,
"calibration/coverage@20%": 0.5237080040614946,
"calibration/coverage@25%": 0.6888860829670989,
"calibration/coverage@30%": 0.7517615176151762,
"calibration/coverage@5%": 0.0026345104155062935,
"calibration/ece": 0.14133860252060226,
"calibration/mean_confidence": 0.7841849798052773,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015538194444444441,
"completions/max_length": 2772.4,
"completions/max_terminated_length": 2772.4,
"completions/mean_length": 612.7648559570313,
"completions/mean_terminated_length": 622.4307373046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 174.4,
"epoch": 0.17999775002812465,
"grad_norm": 0.0004477369075175375,
"learning_rate": 4.006024096385543e-06,
"loss": -0.0129,
"num_tokens": 145207961.0,
"reward": 1.0510570764541627,
"reward_std": 0.15537019968032836,
"rewards/accuracy_reward": 0.6852430582046509,
"rewards/brier_reward": 0.7796695828437805,
"rewards/confidence_uniqueness_reward": 0.9134960055351258,
"rewards/format_reward": 0.9844618082046509,
"rewards/frontier_aurc_reward": -0.002246162900701165,
"rewards/frontier_coverage_1": 0.0013872329844161868,
"rewards/frontier_coverage_10": 0.0013872329844161868,
"rewards/frontier_coverage_15": 0.0013872329844161868,
"rewards/frontier_coverage_20": 0.0013872329844161868,
"rewards/frontier_coverage_25": 0.0013872329844161868,
"rewards/frontier_coverage_5": 0.0013872329844161868,
"rewards/frontier_ece_reward": 0.03560146205127239,
"signal/accuracy_reward/centered_abs_mean": 0.17493489682674407,
"signal/accuracy_reward/group_std_mean": 0.2285703092813492,
"signal/accuracy_reward/group_zero_std_frac": 0.36111111640930177,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08746744841337203,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08746744841337203,
"signal/advantage_abs_mean": 0.11331436336040497,
"signal/advantage_pre_scale_abs_mean": 0.11331436336040497,
"signal/advantage_pre_scale_std": 0.19401153922080994,
"signal/advantage_std": 0.19401153922080994,
"signal/brier_reward/centered_abs_mean": 0.1502958595752716,
"signal/brier_reward/group_std_mean": 0.19655809700489044,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01878698244690895,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01878698244690895,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05595709830522537,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08529313653707504,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006994637288153171,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006994637288153171,
"signal/format_reward/centered_abs_mean": 0.02707790769636631,
"signal/format_reward/group_std_mean": 0.05181853100657463,
"signal/format_reward/group_zero_std_frac": 0.7861111402511597,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013538953848183155,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013538953848183155,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002420555287972093,
"signal/frontier_aurc_reward/group_std_mean": 0.003638601349666715,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.332793687353842e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.332793687353842e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.08835429251194,
"signal/frontier_coverage_1/group_std_mean": 0.12635914981365204,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015815417747944594,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0015815417747944594,
"signal/frontier_coverage_10/centered_abs_mean": 0.08835429251194,
"signal/frontier_coverage_10/group_std_mean": 0.12635914981365204,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015815417747944594,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015815417747944594,
"signal/frontier_coverage_15/centered_abs_mean": 0.08835429251194,
"signal/frontier_coverage_15/group_std_mean": 0.12635914981365204,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015815417747944594,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015815417747944594,
"signal/frontier_coverage_20/centered_abs_mean": 0.08835429251194,
"signal/frontier_coverage_20/group_std_mean": 0.12635914981365204,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015815417747944594,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015815417747944594,
"signal/frontier_coverage_25/centered_abs_mean": 0.08835429251194,
"signal/frontier_coverage_25/group_std_mean": 0.12635914981365204,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015815417747944594,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015815417747944594,
"signal/frontier_coverage_5/centered_abs_mean": 0.08835429251194,
"signal/frontier_coverage_5/group_std_mean": 0.12635914981365204,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015815417747944594,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015815417747944594,
"signal/frontier_ece_reward/centered_abs_mean": 0.042579871416091916,
"signal/frontier_ece_reward/group_std_mean": 0.05289793238043785,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0053224839270114895,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0053224839270114895,
"step": 75
},
{
"calibration/aurc": 0.2283627656944609,
"calibration/batch_distribution_entropy": 0.8158849488071735,
"calibration/buffer_distribution_entropy": 0.7136887505869486,
"calibration/confidence_entropy": 0.4471504385566205,
"calibration/coverage@0%": 0.020931500872600352,
"calibration/coverage@1%": 0.020931500872600352,
"calibration/coverage@10%": 0.27006980802792324,
"calibration/coverage@15%": 0.3453125,
"calibration/coverage@20%": 0.3697916666666667,
"calibration/coverage@25%": 0.6741954607046071,
"calibration/coverage@30%": 0.7745257452574525,
"calibration/coverage@5%": 0.09882744328097731,
"calibration/ece": 0.14655096164643444,
"calibration/mean_confidence": 0.7371357132363405,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017100694444444443,
"completions/max_length": 3731.6,
"completions/max_terminated_length": 3731.6,
"completions/mean_length": 658.69453125,
"completions/mean_terminated_length": 670.2767578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.19199760002999963,
"grad_norm": 0.0004507862322498113,
"learning_rate": 3.855421686746989e-06,
"loss": -0.0163,
"num_tokens": 155849402.0,
"reward": 1.0312724709510803,
"reward_std": 0.1551019161939621,
"rewards/accuracy_reward": 0.6509548664093018,
"rewards/brier_reward": 0.7650970101356507,
"rewards/confidence_uniqueness_reward": 0.9270597219467163,
"rewards/format_reward": 0.982812511920929,
"rewards/frontier_aurc_reward": -0.0021514812484383582,
"rewards/frontier_coverage_1": 0.0006764297373592854,
"rewards/frontier_coverage_10": 0.0006764297373592854,
"rewards/frontier_coverage_15": 0.0006764297373592854,
"rewards/frontier_coverage_20": 0.0006764297373592854,
"rewards/frontier_coverage_25": 0.0006764297373592854,
"rewards/frontier_coverage_5": 0.0006764297373592854,
"rewards/frontier_ece_reward": 0.022680159099400042,
"signal/accuracy_reward/centered_abs_mean": 0.18104926347732545,
"signal/accuracy_reward/group_std_mean": 0.23661141991615295,
"signal/accuracy_reward/group_zero_std_frac": 0.33611112236976626,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09052463173866272,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09052463173866272,
"signal/advantage_abs_mean": 0.11439994722604752,
"signal/advantage_pre_scale_abs_mean": 0.11439994722604752,
"signal/advantage_pre_scale_std": 0.1880962520837784,
"signal/advantage_std": 0.1880962520837784,
"signal/brier_reward/centered_abs_mean": 0.15392581820487977,
"signal/brier_reward/group_std_mean": 0.19755606949329377,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01924072727560997,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01924072727560997,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.045452053844928744,
"signal/confidence_uniqueness_reward/group_std_mean": 0.070206418633461,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005681506730616093,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005681506730616093,
"signal/format_reward/centered_abs_mean": 0.027105035632848738,
"signal/format_reward/group_std_mean": 0.04782758429646492,
"signal/format_reward/group_zero_std_frac": 0.8166666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013552517816424369,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013552517816424369,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020432798657566307,
"signal/frontier_aurc_reward/group_std_mean": 0.003136290283873677,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.657470879261382e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.657470879261382e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12025301158428192,
"signal/frontier_coverage_1/group_std_mean": 0.16328471302986144,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021525288466364145,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021525288466364145,
"signal/frontier_coverage_10/centered_abs_mean": 0.12025301158428192,
"signal/frontier_coverage_10/group_std_mean": 0.16328471302986144,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021525288466364145,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021525288466364145,
"signal/frontier_coverage_15/centered_abs_mean": 0.12025301158428192,
"signal/frontier_coverage_15/group_std_mean": 0.16328471302986144,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021525288466364145,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021525288466364145,
"signal/frontier_coverage_20/centered_abs_mean": 0.12025301158428192,
"signal/frontier_coverage_20/group_std_mean": 0.16328471302986144,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021525288466364145,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021525288466364145,
"signal/frontier_coverage_25/centered_abs_mean": 0.12025301158428192,
"signal/frontier_coverage_25/group_std_mean": 0.16328471302986144,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021525288466364145,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021525288466364145,
"signal/frontier_coverage_5/centered_abs_mean": 0.12025301158428192,
"signal/frontier_coverage_5/group_std_mean": 0.16328471302986144,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021525288466364145,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021525288466364145,
"signal/frontier_ece_reward/centered_abs_mean": 0.034540054574608806,
"signal/frontier_ece_reward/group_std_mean": 0.044214902073144914,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004317506821826101,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004317506821826101,
"step": 80
},
{
"calibration/aurc": 0.19863731386705097,
"calibration/batch_distribution_entropy": 0.8784851844174906,
"calibration/buffer_distribution_entropy": 0.7286079857168308,
"calibration/confidence_entropy": 0.47194029223098666,
"calibration/coverage@0%": 0.006957876563311484,
"calibration/coverage@1%": 0.006957876563311484,
"calibration/coverage@10%": 0.21343171212142478,
"calibration/coverage@15%": 0.31718648568982216,
"calibration/coverage@20%": 0.5108775185596135,
"calibration/coverage@25%": 0.8152747734527581,
"calibration/coverage@30%": 0.9188776441194223,
"calibration/coverage@5%": 0.06903902044177439,
"calibration/ece": 0.11392621986175389,
"calibration/mean_confidence": 0.685149595819355,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017447916666666674,
"completions/max_length": 3531.0,
"completions/max_terminated_length": 3531.0,
"completions/mean_length": 672.5692749023438,
"completions/mean_terminated_length": 684.5512817382812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 181.8,
"epoch": 0.2039974500318746,
"grad_norm": 0.00040943012572824955,
"learning_rate": 3.7048192771084342e-06,
"loss": -0.0158,
"num_tokens": 166684600.0,
"reward": 1.0490335464477538,
"reward_std": 0.15231254696846008,
"rewards/accuracy_reward": 0.6825520753860473,
"rewards/brier_reward": 0.7915989398956299,
"rewards/confidence_uniqueness_reward": 0.9125476956367493,
"rewards/format_reward": 0.9823784708976746,
"rewards/frontier_aurc_reward": -0.0017510119127109648,
"rewards/frontier_coverage_1": 0.0037823686841875316,
"rewards/frontier_coverage_10": 0.0037823686841875316,
"rewards/frontier_coverage_15": 0.0037823686841875316,
"rewards/frontier_coverage_20": 0.0037823686841875316,
"rewards/frontier_coverage_25": 0.0037823686841875316,
"rewards/frontier_coverage_5": 0.0037823686841875316,
"rewards/frontier_ece_reward": 0.025400371849536897,
"signal/accuracy_reward/centered_abs_mean": 0.1764702707529068,
"signal/accuracy_reward/group_std_mean": 0.22980018258094786,
"signal/accuracy_reward/group_zero_std_frac": 0.35555556416511536,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0882351353764534,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0882351353764534,
"signal/advantage_abs_mean": 0.10985312461853028,
"signal/advantage_pre_scale_abs_mean": 0.10985312461853028,
"signal/advantage_pre_scale_std": 0.19002984464168549,
"signal/advantage_std": 0.19002984464168549,
"signal/brier_reward/centered_abs_mean": 0.14407358169555665,
"signal/brier_reward/group_std_mean": 0.18776251673698424,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01800919771194458,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01800919771194458,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.051011984795331956,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08202408254146576,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0063764980994164945,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0063764980994164945,
"signal/format_reward/centered_abs_mean": 0.03092990405857563,
"signal/format_reward/group_std_mean": 0.05930749401450157,
"signal/format_reward/group_zero_std_frac": 0.7555555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015464952029287816,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.015464952029287816,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017428799066692591,
"signal/frontier_aurc_reward/group_std_mean": 0.002763870591297746,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.119755092484411e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.119755092484411e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.11682608127593994,
"signal/frontier_coverage_1/group_std_mean": 0.1566822350025177,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020911867963150144,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020911867963150144,
"signal/frontier_coverage_10/centered_abs_mean": 0.11682608127593994,
"signal/frontier_coverage_10/group_std_mean": 0.1566822350025177,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020911867963150144,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020911867963150144,
"signal/frontier_coverage_15/centered_abs_mean": 0.11682608127593994,
"signal/frontier_coverage_15/group_std_mean": 0.1566822350025177,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020911867963150144,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020911867963150144,
"signal/frontier_coverage_20/centered_abs_mean": 0.11682608127593994,
"signal/frontier_coverage_20/group_std_mean": 0.1566822350025177,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020911867963150144,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020911867963150144,
"signal/frontier_coverage_25/centered_abs_mean": 0.11682608127593994,
"signal/frontier_coverage_25/group_std_mean": 0.1566822350025177,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020911867963150144,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020911867963150144,
"signal/frontier_coverage_5/centered_abs_mean": 0.11682608127593994,
"signal/frontier_coverage_5/group_std_mean": 0.1566822350025177,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020911867963150144,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020911867963150144,
"signal/frontier_ece_reward/centered_abs_mean": 0.03192468658089638,
"signal/frontier_ece_reward/group_std_mean": 0.04087934568524361,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0039905858226120475,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0039905858226120475,
"step": 85
},
{
"calibration/aurc": 0.1621043276555874,
"calibration/batch_distribution_entropy": 0.7713018291348717,
"calibration/buffer_distribution_entropy": 0.7391826748921617,
"calibration/confidence_entropy": 0.3906538953529201,
"calibration/coverage@0%": 0.01748775430290532,
"calibration/coverage@1%": 0.01748775430290532,
"calibration/coverage@10%": 0.29865454137224134,
"calibration/coverage@15%": 0.4961013783380685,
"calibration/coverage@20%": 0.7170695539701011,
"calibration/coverage@25%": 0.8760416319073021,
"calibration/coverage@30%": 0.9350359579568505,
"calibration/coverage@5%": 0.12930449114246848,
"calibration/ece": 0.10879461725676424,
"calibration/mean_confidence": 0.7703561898492359,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.018055555555555537,
"completions/max_length": 3415.8,
"completions/max_terminated_length": 3415.8,
"completions/mean_length": 657.1771728515625,
"completions/mean_terminated_length": 669.2972290039063,
"completions/min_length": 0.0,
"completions/min_terminated_length": 158.0,
"epoch": 0.2159973000337496,
"grad_norm": 0.00045981750008650124,
"learning_rate": 3.5542168674698798e-06,
"loss": -0.0165,
"num_tokens": 177323953.0,
"reward": 1.048110580444336,
"reward_std": 0.1468990057706833,
"rewards/accuracy_reward": 0.6861111164093018,
"rewards/brier_reward": 0.786482310295105,
"rewards/confidence_uniqueness_reward": 0.8900201439857482,
"rewards/format_reward": 0.9819444417953491,
"rewards/frontier_aurc_reward": -0.002023177081719041,
"rewards/frontier_coverage_1": 0.006752363312989474,
"rewards/frontier_coverage_10": 0.006752363312989474,
"rewards/frontier_coverage_15": 0.006752363312989474,
"rewards/frontier_coverage_20": 0.006752363312989474,
"rewards/frontier_coverage_25": 0.006752363312989474,
"rewards/frontier_coverage_5": 0.006752363312989474,
"rewards/frontier_ece_reward": 0.03064808137714863,
"signal/accuracy_reward/centered_abs_mean": 0.16185981035232544,
"signal/accuracy_reward/group_std_mean": 0.21465785503387452,
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08092990517616272,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08092990517616272,
"signal/advantage_abs_mean": 0.10632715672254563,
"signal/advantage_pre_scale_abs_mean": 0.10632715672254563,
"signal/advantage_pre_scale_std": 0.18754963874816893,
"signal/advantage_std": 0.18754963874816893,
"signal/brier_reward/centered_abs_mean": 0.14517129957675934,
"signal/brier_reward/group_std_mean": 0.1876837819814682,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018146412447094917,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018146412447094917,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0636248804628849,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0918489396572113,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007953110057860613,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007953110057860613,
"signal/format_reward/centered_abs_mean": 0.03050130233168602,
"signal/format_reward/group_std_mean": 0.054274033010005954,
"signal/format_reward/group_zero_std_frac": 0.7888888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01525065116584301,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01525065116584301,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024305079132318497,
"signal/frontier_aurc_reward/group_std_mean": 0.003738354705274105,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.3506090150913224e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.3506090150913224e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.09725135415792466,
"signal/frontier_coverage_1/group_std_mean": 0.1383568376302719,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017407992389053106,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017407992389053106,
"signal/frontier_coverage_10/centered_abs_mean": 0.09725135415792466,
"signal/frontier_coverage_10/group_std_mean": 0.1383568376302719,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017407992389053106,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017407992389053106,
"signal/frontier_coverage_15/centered_abs_mean": 0.09725135415792466,
"signal/frontier_coverage_15/group_std_mean": 0.1383568376302719,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017407992389053106,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017407992389053106,
"signal/frontier_coverage_20/centered_abs_mean": 0.09725135415792466,
"signal/frontier_coverage_20/group_std_mean": 0.1383568376302719,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017407992389053106,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017407992389053106,
"signal/frontier_coverage_25/centered_abs_mean": 0.09725135415792466,
"signal/frontier_coverage_25/group_std_mean": 0.1383568376302719,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017407992389053106,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017407992389053106,
"signal/frontier_coverage_5/centered_abs_mean": 0.09725135415792466,
"signal/frontier_coverage_5/group_std_mean": 0.1383568376302719,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017407992389053106,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017407992389053106,
"signal/frontier_ece_reward/centered_abs_mean": 0.03363135680556297,
"signal/frontier_ece_reward/group_std_mean": 0.04208812639117241,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0042039196006953715,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0042039196006953715,
"step": 90
},
{
"calibration/aurc": 0.22302830877145974,
"calibration/batch_distribution_entropy": 0.7654984420303647,
"calibration/buffer_distribution_entropy": 0.7426643403332136,
"calibration/confidence_entropy": 0.37485479191168325,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.32827225130890053,
"calibration/coverage@15%": 0.44633415268512344,
"calibration/coverage@20%": 0.5211039690113962,
"calibration/coverage@25%": 0.5988238926626621,
"calibration/coverage@30%": 0.70975130522652,
"calibration/coverage@5%": 0.10628272251308901,
"calibration/ece": 0.17951479772284673,
"calibration/mean_confidence": 0.7752378849995252,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010850694444444465,
"completions/max_length": 3244.6,
"completions/max_terminated_length": 3244.6,
"completions/mean_length": 665.167529296875,
"completions/mean_terminated_length": 672.4716430664063,
"completions/min_length": 0.0,
"completions/min_terminated_length": 154.6,
"epoch": 0.22799715003562457,
"grad_norm": 0.00041399727342650294,
"learning_rate": 3.4036144578313257e-06,
"loss": -0.0103,
"num_tokens": 188078363.0,
"reward": 1.0430236101150512,
"reward_std": 0.1384707883000374,
"rewards/accuracy_reward": 0.6696180582046509,
"rewards/brier_reward": 0.7802430033683777,
"rewards/confidence_uniqueness_reward": 0.885105288028717,
"rewards/format_reward": 0.989062488079071,
"rewards/frontier_aurc_reward": -0.002526196092367172,
"rewards/frontier_coverage_1": 0.017078271601349115,
"rewards/frontier_coverage_10": 0.017078271601349115,
"rewards/frontier_coverage_15": 0.017078271601349115,
"rewards/frontier_coverage_20": 0.017078271601349115,
"rewards/frontier_coverage_25": 0.017078271601349115,
"rewards/frontier_coverage_5": 0.017078271601349115,
"rewards/frontier_ece_reward": 0.029806675761938094,
"signal/accuracy_reward/centered_abs_mean": 0.15270182192325593,
"signal/accuracy_reward/group_std_mean": 0.2024629831314087,
"signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07635091096162797,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07635091096162797,
"signal/advantage_abs_mean": 0.09986221194267272,
"signal/advantage_pre_scale_abs_mean": 0.09986221194267272,
"signal/advantage_pre_scale_std": 0.17722425758838653,
"signal/advantage_std": 0.17722425758838653,
"signal/brier_reward/centered_abs_mean": 0.14648381173610686,
"signal/brier_reward/group_std_mean": 0.19169094264507294,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018310476467013358,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018310476467013358,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06063394397497177,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08702098578214645,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007579242996871472,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007579242996871472,
"signal/format_reward/centered_abs_mean": 0.019715712033212186,
"signal/format_reward/group_std_mean": 0.039822696894407275,
"signal/format_reward/group_zero_std_frac": 0.8305555701255798,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009857856016606093,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009857856016606093,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003348661307245493,
"signal/frontier_aurc_reward/group_std_mean": 0.005126806069165468,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.994103412376717e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.994103412376717e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.08807848840951919,
"signal/frontier_coverage_1/group_std_mean": 0.126033778488636,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00157660492695868,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00157660492695868,
"signal/frontier_coverage_10/centered_abs_mean": 0.08807848840951919,
"signal/frontier_coverage_10/group_std_mean": 0.126033778488636,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00157660492695868,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00157660492695868,
"signal/frontier_coverage_15/centered_abs_mean": 0.08807848840951919,
"signal/frontier_coverage_15/group_std_mean": 0.126033778488636,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00157660492695868,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00157660492695868,
"signal/frontier_coverage_20/centered_abs_mean": 0.08807848840951919,
"signal/frontier_coverage_20/group_std_mean": 0.126033778488636,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00157660492695868,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00157660492695868,
"signal/frontier_coverage_25/centered_abs_mean": 0.08807848840951919,
"signal/frontier_coverage_25/group_std_mean": 0.126033778488636,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00157660492695868,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00157660492695868,
"signal/frontier_coverage_5/centered_abs_mean": 0.08807848840951919,
"signal/frontier_coverage_5/group_std_mean": 0.126033778488636,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00157660492695868,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00157660492695868,
"signal/frontier_ece_reward/centered_abs_mean": 0.03316693603992462,
"signal/frontier_ece_reward/group_std_mean": 0.04110720306634903,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0041458670049905775,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0041458670049905775,
"step": 95
},
{
"calibration/aurc": 0.19005778525037625,
"calibration/batch_distribution_entropy": 0.7905120036990934,
"calibration/buffer_distribution_entropy": 0.7454486867596737,
"calibration/confidence_entropy": 0.41916534905811276,
"calibration/coverage@0%": 0.01983628239499553,
"calibration/coverage@1%": 0.01983628239499553,
"calibration/coverage@10%": 0.12477490240346174,
"calibration/coverage@15%": 0.3196656977564555,
"calibration/coverage@20%": 0.7049851243582956,
"calibration/coverage@25%": 0.9022022628372499,
"calibration/coverage@30%": 0.9375679721496954,
"calibration/coverage@5%": 0.049326899016979446,
"calibration/ece": 0.09743477958458721,
"calibration/mean_confidence": 0.7563954787317189,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015364583333333303,
"completions/max_length": 3783.6,
"completions/max_terminated_length": 3783.6,
"completions/mean_length": 696.7359375,
"completions/mean_terminated_length": 707.5933471679688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 184.8,
"epoch": 0.23999700003749952,
"grad_norm": 0.00038863401277922094,
"learning_rate": 3.2530120481927713e-06,
"loss": -0.0138,
"num_tokens": 199203833.0,
"reward": 1.0556352138519287,
"reward_std": 0.1429404079914093,
"rewards/accuracy_reward": 0.6901041626930237,
"rewards/brier_reward": 0.8032171607017518,
"rewards/confidence_uniqueness_reward": 0.8976126194000245,
"rewards/format_reward": 0.9845486164093018,
"rewards/frontier_aurc_reward": -0.0018692356767132877,
"rewards/frontier_coverage_1": 0.02305122137913713,
"rewards/frontier_coverage_10": 0.02305122137913713,
"rewards/frontier_coverage_15": 0.02305122137913713,
"rewards/frontier_coverage_20": 0.02305122137913713,
"rewards/frontier_coverage_25": 0.02305122137913713,
"rewards/frontier_coverage_5": 0.02305122137913713,
"rewards/frontier_ece_reward": 0.02610306181013584,
"signal/accuracy_reward/centered_abs_mean": 0.16201171576976775,
"signal/accuracy_reward/group_std_mean": 0.21425627470016478,
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08100585788488388,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08100585788488388,
"signal/advantage_abs_mean": 0.10346845239400863,
"signal/advantage_pre_scale_abs_mean": 0.10346845239400863,
"signal/advantage_pre_scale_std": 0.18151322603225709,
"signal/advantage_std": 0.18151322603225709,
"signal/brier_reward/centered_abs_mean": 0.13857089430093766,
"signal/brier_reward/group_std_mean": 0.18216053247451783,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017321361787617208,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017321361787617208,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.050343307107686995,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07695924490690231,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006292913388460874,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006292913388460874,
"signal/format_reward/centered_abs_mean": 0.02498914934694767,
"signal/format_reward/group_std_mean": 0.04789231047034263,
"signal/format_reward/group_zero_std_frac": 0.8,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012494574673473835,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012494574673473835,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00248065204359591,
"signal/frontier_aurc_reward/group_std_mean": 0.0040627093985676765,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.440367265488021e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.440367265488021e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.09468016475439071,
"signal/frontier_coverage_1/group_std_mean": 0.13442795127630233,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016947749769315123,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016947749769315123,
"signal/frontier_coverage_10/centered_abs_mean": 0.09468016475439071,
"signal/frontier_coverage_10/group_std_mean": 0.13442795127630233,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016947749769315123,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016947749769315123,
"signal/frontier_coverage_15/centered_abs_mean": 0.09468016475439071,
"signal/frontier_coverage_15/group_std_mean": 0.13442795127630233,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016947749769315123,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016947749769315123,
"signal/frontier_coverage_20/centered_abs_mean": 0.09468016475439071,
"signal/frontier_coverage_20/group_std_mean": 0.13442795127630233,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016947749769315123,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016947749769315123,
"signal/frontier_coverage_25/centered_abs_mean": 0.09468016475439071,
"signal/frontier_coverage_25/group_std_mean": 0.13442795127630233,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016947749769315123,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016947749769315123,
"signal/frontier_coverage_5/centered_abs_mean": 0.09468016475439071,
"signal/frontier_coverage_5/group_std_mean": 0.13442795127630233,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016947749769315123,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016947749769315123,
"signal/frontier_ece_reward/centered_abs_mean": 0.028183171153068544,
"signal/frontier_ece_reward/group_std_mean": 0.035510845482349396,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003522896394133568,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003522896394133568,
"step": 100
},
{
"epoch": 0.23999700003749952,
"eval_calibration/aurc": 0.15891660441180894,
"eval_calibration/batch_distribution_entropy": 0.6959708379422341,
"eval_calibration/buffer_distribution_entropy": 0.7499945272337074,
"eval_calibration/confidence_entropy": 0.414659227708472,
"eval_calibration/coverage@0%": 0.19452284946236562,
"eval_calibration/coverage@1%": 0.19452284946236562,
"eval_calibration/coverage@10%": 0.3979166666666667,
"eval_calibration/coverage@15%": 0.6486335125448028,
"eval_calibration/coverage@20%": 0.7757056451612904,
"eval_calibration/coverage@25%": 0.8178763440860215,
"eval_calibration/coverage@30%": 0.9625336021505376,
"eval_calibration/coverage@5%": 0.24828629032258065,
"eval_calibration/ece": 0.1466347668793299,
"eval_calibration/mean_confidence": 0.7744951179912419,
"eval_completions/clipped_ratio": 0.012152777777777771,
"eval_completions/max_length": 2663.8333333333335,
"eval_completions/max_terminated_length": 2663.8333333333335,
"eval_completions/mean_length": 688.960947672526,
"eval_completions/mean_terminated_length": 697.5240783691406,
"eval_completions/min_length": 46.333333333333336,
"eval_completions/min_terminated_length": 219.66666666666666,
"eval_loss": 0.0,
"eval_num_tokens": 199203833.0,
"eval_reward": 1.0394453605016072,
"eval_reward_std": 0.2735634073615074,
"eval_rewards/accuracy_reward": 0.6710069477558136,
"eval_rewards/brier_reward": 0.7972110112508138,
"eval_rewards/confidence_uniqueness_reward": 0.8433377345403036,
"eval_rewards/format_reward": 0.9869791666666666,
"eval_rewards/frontier_aurc_reward": -0.002003069695395728,
"eval_rewards/frontier_coverage_1": 0.026348761282861233,
"eval_rewards/frontier_coverage_10": 0.026348761282861233,
"eval_rewards/frontier_coverage_15": 0.026348761282861233,
"eval_rewards/frontier_coverage_20": 0.026348761282861233,
"eval_rewards/frontier_coverage_25": 0.026348761282861233,
"eval_rewards/frontier_coverage_5": 0.026348761282861233,
"eval_rewards/frontier_ece_reward": 0.0207175404454271,
"eval_runtime": 205.6415,
"eval_samples_per_second": 4.863,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4276801198720932,
"eval_signal/accuracy_reward/group_std_mean": 0.4686971952517827,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2138400599360466,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2138400599360466,
"eval_signal/advantage_abs_mean": 0.237903726597627,
"eval_signal/advantage_pre_scale_abs_mean": 0.237903726597627,
"eval_signal/advantage_pre_scale_std": 0.2721952473123868,
"eval_signal/advantage_std": 0.2721952473123868,
"eval_signal/brier_reward/centered_abs_mean": 0.22816414137681326,
"eval_signal/brier_reward/group_std_mean": 0.28985429803530377,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028520517672101658,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.028520517672101658,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07482141132156055,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.11087949698170026,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009352676415195068,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009352676415195068,
"eval_signal/format_reward/centered_abs_mean": 0.024793836598594982,
"eval_signal/format_reward/group_std_mean": 0.06416239465276401,
"eval_signal/format_reward/group_zero_std_frac": 0.6666666865348816,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.012396918299297491,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.012396918299297491,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0030194248538464308,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005542080150917172,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4047704907134175e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4047704907134175e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.1204829066991806,
"eval_signal/frontier_coverage_1/group_std_mean": 0.20749556769927344,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002156644050652782,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002156644050652782,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.1204829066991806,
"eval_signal/frontier_coverage_10/group_std_mean": 0.20749556769927344,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002156644050652782,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002156644050652782,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.1204829066991806,
"eval_signal/frontier_coverage_15/group_std_mean": 0.20749556769927344,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002156644050652782,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002156644050652782,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.1204829066991806,
"eval_signal/frontier_coverage_20/group_std_mean": 0.20749556769927344,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002156644050652782,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002156644050652782,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.1204829066991806,
"eval_signal/frontier_coverage_25/group_std_mean": 0.20749556769927344,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002156644050652782,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002156644050652782,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.1204829066991806,
"eval_signal/frontier_coverage_5/group_std_mean": 0.20749556769927344,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002156644050652782,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002156644050652782,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.03396274273594221,
"eval_signal/frontier_ece_reward/group_std_mean": 0.04516912375887235,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004245342841992776,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004245342841992776,
"eval_steps_per_second": 0.029,
"step": 100
},
{
"calibration/aurc": 0.3174403591072565,
"calibration/batch_distribution_entropy": 0.7832574028492738,
"calibration/buffer_distribution_entropy": 0.7522402415093571,
"calibration/confidence_entropy": 0.4539565113764869,
"calibration/coverage@0%": 0.015250790160780538,
"calibration/coverage@1%": 0.015250790160780538,
"calibration/coverage@10%": 0.14032004223834288,
"calibration/coverage@15%": 0.15768846329097447,
"calibration/coverage@20%": 0.25616122896219523,
"calibration/coverage@25%": 0.32953986243608197,
"calibration/coverage@30%": 0.3985706226539276,
"calibration/coverage@5%": 0.08104026384499106,
"calibration/ece": 0.17285030321480427,
"calibration/mean_confidence": 0.7526070795683661,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015104166666666651,
"completions/max_length": 3606.8,
"completions/max_terminated_length": 3606.8,
"completions/mean_length": 710.5170166015625,
"completions/mean_terminated_length": 721.425048828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 164.6,
"epoch": 0.2519968500393745,
"grad_norm": 0.0004076336626894772,
"learning_rate": 3.1024096385542172e-06,
"loss": -0.0122,
"num_tokens": 210465853.0,
"reward": 1.0506530284881592,
"reward_std": 0.13989392966032027,
"rewards/accuracy_reward": 0.68125,
"rewards/brier_reward": 0.8004813671112061,
"rewards/confidence_uniqueness_reward": 0.9040295124053955,
"rewards/format_reward": 0.9848958373069763,
"rewards/frontier_aurc_reward": -0.0015720528550446033,
"rewards/frontier_coverage_1": 0.018485220894217492,
"rewards/frontier_coverage_10": 0.018485220894217492,
"rewards/frontier_coverage_15": 0.018485220894217492,
"rewards/frontier_coverage_20": 0.018485220894217492,
"rewards/frontier_coverage_25": 0.018485220894217492,
"rewards/frontier_coverage_5": 0.018485220894217492,
"rewards/frontier_ece_reward": 0.0204722385853529,
"signal/accuracy_reward/centered_abs_mean": 0.15923394113779069,
"signal/accuracy_reward/group_std_mean": 0.21463679075241088,
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07961697056889534,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07961697056889534,
"signal/advantage_abs_mean": 0.09950301349163056,
"signal/advantage_pre_scale_abs_mean": 0.09950301349163056,
"signal/advantage_pre_scale_std": 0.17661311626434326,
"signal/advantage_std": 0.17661311626434326,
"signal/brier_reward/centered_abs_mean": 0.13340435177087784,
"signal/brier_reward/group_std_mean": 0.17492244243621827,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01667554397135973,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01667554397135973,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.047524832934141156,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07183988243341446,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0059406041167676445,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0059406041167676445,
"signal/format_reward/centered_abs_mean": 0.024891493655741215,
"signal/format_reward/group_std_mean": 0.04560527727007866,
"signal/format_reward/group_zero_std_frac": 0.8166666865348816,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012445746827870608,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012445746827870608,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016542579047381877,
"signal/frontier_aurc_reward/group_std_mean": 0.002758215693756938,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9611215722979978e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9611215722979978e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.09999113231897354,
"signal/frontier_coverage_1/group_std_mean": 0.13899571299552918,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017898412188515067,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017898412188515067,
"signal/frontier_coverage_10/centered_abs_mean": 0.09999113231897354,
"signal/frontier_coverage_10/group_std_mean": 0.13899571299552918,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017898412188515067,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017898412188515067,
"signal/frontier_coverage_15/centered_abs_mean": 0.09999113231897354,
"signal/frontier_coverage_15/group_std_mean": 0.13899571299552918,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017898412188515067,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017898412188515067,
"signal/frontier_coverage_20/centered_abs_mean": 0.09999113231897354,
"signal/frontier_coverage_20/group_std_mean": 0.13899571299552918,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017898412188515067,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017898412188515067,
"signal/frontier_coverage_25/centered_abs_mean": 0.09999113231897354,
"signal/frontier_coverage_25/group_std_mean": 0.13899571299552918,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017898412188515067,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017898412188515067,
"signal/frontier_coverage_5/centered_abs_mean": 0.09999113231897354,
"signal/frontier_coverage_5/group_std_mean": 0.13899571299552918,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017898412188515067,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017898412188515067,
"signal/frontier_ece_reward/centered_abs_mean": 0.02349744737148285,
"signal/frontier_ece_reward/group_std_mean": 0.030407802015542985,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002937180921435356,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002937180921435356,
"step": 105
},
{
"calibration/aurc": 0.1980429971425905,
"calibration/batch_distribution_entropy": 0.8188233169740314,
"calibration/buffer_distribution_entropy": 0.7578019516298132,
"calibration/confidence_entropy": 0.4403369866103337,
"calibration/coverage@0%": 0.04398981835652027,
"calibration/coverage@1%": 0.04398981835652027,
"calibration/coverage@10%": 0.2766707394091519,
"calibration/coverage@15%": 0.3595005740521652,
"calibration/coverage@20%": 0.4683630213987621,
"calibration/coverage@25%": 0.6562042932608049,
"calibration/coverage@30%": 0.8698795999435456,
"calibration/coverage@5%": 0.15361152888283608,
"calibration/ece": 0.11835691165409723,
"calibration/mean_confidence": 0.7310259669183279,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013541666666666652,
"completions/max_length": 3426.8,
"completions/max_terminated_length": 3426.8,
"completions/mean_length": 731.7519897460937,
"completions/mean_terminated_length": 741.8162719726563,
"completions/min_length": 0.0,
"completions/min_terminated_length": 207.4,
"epoch": 0.2639967000412495,
"grad_norm": 0.0004139347583986819,
"learning_rate": 2.9518072289156627e-06,
"loss": -0.0117,
"num_tokens": 222004084.0,
"reward": 1.0684332370758056,
"reward_std": 0.1274869754910469,
"rewards/accuracy_reward": 0.7131944537162781,
"rewards/brier_reward": 0.813305401802063,
"rewards/confidence_uniqueness_reward": 0.9080566763877869,
"rewards/format_reward": 0.9864583373069763,
"rewards/frontier_aurc_reward": -0.001346051273867488,
"rewards/frontier_coverage_1": 0.008798408973962068,
"rewards/frontier_coverage_10": 0.008798408973962068,
"rewards/frontier_coverage_15": 0.008798408973962068,
"rewards/frontier_coverage_20": 0.008798408973962068,
"rewards/frontier_coverage_25": 0.008798408973962068,
"rewards/frontier_coverage_5": 0.008798408973962068,
"rewards/frontier_ece_reward": 0.02012586295604706,
"signal/accuracy_reward/centered_abs_mean": 0.14842664897441865,
"signal/accuracy_reward/group_std_mean": 0.19679024815559387,
"signal/accuracy_reward/group_zero_std_frac": 0.43888888955116273,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07421332448720933,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07421332448720933,
"signal/advantage_abs_mean": 0.09238868355751037,
"signal/advantage_pre_scale_abs_mean": 0.09238868355751037,
"signal/advantage_pre_scale_std": 0.1690053313970566,
"signal/advantage_std": 0.1690053313970566,
"signal/brier_reward/centered_abs_mean": 0.12360798418521882,
"signal/brier_reward/group_std_mean": 0.16248373985290526,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015450998023152352,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015450998023152352,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04519175067543983,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06818027943372726,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005648968834429979,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005648968834429979,
"signal/format_reward/centered_abs_mean": 0.022352430410683154,
"signal/format_reward/group_std_mean": 0.04120796211063862,
"signal/format_reward/group_zero_std_frac": 0.8333333492279053,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011176215205341577,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.011176215205341577,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013575590681284666,
"signal/frontier_aurc_reward/group_std_mean": 0.0022294150665402414,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4300306176883168e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4300306176883168e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.10466690212488175,
"signal/frontier_coverage_1/group_std_mean": 0.1473758965730667,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018735374789685011,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018735374789685011,
"signal/frontier_coverage_10/centered_abs_mean": 0.10466690212488175,
"signal/frontier_coverage_10/group_std_mean": 0.1473758965730667,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018735374789685011,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018735374789685011,
"signal/frontier_coverage_15/centered_abs_mean": 0.10466690212488175,
"signal/frontier_coverage_15/group_std_mean": 0.1473758965730667,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018735374789685011,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018735374789685011,
"signal/frontier_coverage_20/centered_abs_mean": 0.10466690212488175,
"signal/frontier_coverage_20/group_std_mean": 0.1473758965730667,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018735374789685011,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018735374789685011,
"signal/frontier_coverage_25/centered_abs_mean": 0.10466690212488175,
"signal/frontier_coverage_25/group_std_mean": 0.1473758965730667,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018735374789685011,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018735374789685011,
"signal/frontier_coverage_5/centered_abs_mean": 0.10466690212488175,
"signal/frontier_coverage_5/group_std_mean": 0.1473758965730667,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018735374789685011,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018735374789685011,
"signal/frontier_ece_reward/centered_abs_mean": 0.022222984954714774,
"signal/frontier_ece_reward/group_std_mean": 0.028566232323646544,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027778731193393467,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027778731193393467,
"step": 110
},
{
"calibration/aurc": 0.2674647745371999,
"calibration/batch_distribution_entropy": 0.8655340385503951,
"calibration/buffer_distribution_entropy": 0.7650680762501356,
"calibration/confidence_entropy": 0.46264810399743606,
"calibration/coverage@0%": 0.0356302909735624,
"calibration/coverage@1%": 0.0356302909735624,
"calibration/coverage@10%": 0.11016870839079522,
"calibration/coverage@15%": 0.22985091263385246,
"calibration/coverage@20%": 0.4752261244438419,
"calibration/coverage@25%": 0.5995528351424358,
"calibration/coverage@30%": 0.66065353496751,
"calibration/coverage@5%": 0.059629899612001526,
"calibration/ece": 0.16247371201899063,
"calibration/mean_confidence": 0.6835954949209723,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01284722222222221,
"completions/max_length": 3594.4,
"completions/max_terminated_length": 3594.4,
"completions/mean_length": 743.1428955078125,
"completions/mean_terminated_length": 752.9280395507812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 198.4,
"epoch": 0.27599655004312446,
"grad_norm": 0.0003546822990756482,
"learning_rate": 2.8012048192771087e-06,
"loss": -0.0119,
"num_tokens": 233644290.0,
"reward": 1.045598602294922,
"reward_std": 0.1324237823486328,
"rewards/accuracy_reward": 0.665538203716278,
"rewards/brier_reward": 0.7980833888053894,
"rewards/confidence_uniqueness_reward": 0.9122416257858277,
"rewards/format_reward": 0.9870659828186035,
"rewards/frontier_aurc_reward": -0.0016012408072128893,
"rewards/frontier_coverage_1": 0.03013449099380523,
"rewards/frontier_coverage_10": 0.03013449099380523,
"rewards/frontier_coverage_15": 0.03013449099380523,
"rewards/frontier_coverage_20": 0.03013449099380523,
"rewards/frontier_coverage_25": 0.03013449099380523,
"rewards/frontier_coverage_5": 0.03013449099380523,
"rewards/frontier_ece_reward": 0.018384577706456184,
"signal/accuracy_reward/centered_abs_mean": 0.16057400405406952,
"signal/accuracy_reward/group_std_mean": 0.20613610446453096,
"signal/accuracy_reward/group_zero_std_frac": 0.43055555820465086,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08028700202703476,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08028700202703476,
"signal/advantage_abs_mean": 0.09785276204347611,
"signal/advantage_pre_scale_abs_mean": 0.09785276204347611,
"signal/advantage_pre_scale_std": 0.17164961993694305,
"signal/advantage_std": 0.17164961993694305,
"signal/brier_reward/centered_abs_mean": 0.13958249241113663,
"signal/brier_reward/group_std_mean": 0.17782102823257445,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01744781155139208,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01744781155139208,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0446560338139534,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06761002168059349,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005582004226744175,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005582004226744175,
"signal/format_reward/centered_abs_mean": 0.021869575139135122,
"signal/format_reward/group_std_mean": 0.04040019139647484,
"signal/format_reward/group_zero_std_frac": 0.8388888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010934787569567561,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010934787569567561,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001572295231744647,
"signal/frontier_aurc_reward/group_std_mean": 0.002514668833464384,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8144082898506893e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8144082898506893e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12204509526491165,
"signal/frontier_coverage_1/group_std_mean": 0.16676346063613892,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002184607065282762,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002184607065282762,
"signal/frontier_coverage_10/centered_abs_mean": 0.12204509526491165,
"signal/frontier_coverage_10/group_std_mean": 0.16676346063613892,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002184607065282762,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002184607065282762,
"signal/frontier_coverage_15/centered_abs_mean": 0.12204509526491165,
"signal/frontier_coverage_15/group_std_mean": 0.16676346063613892,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002184607065282762,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002184607065282762,
"signal/frontier_coverage_20/centered_abs_mean": 0.12204509526491165,
"signal/frontier_coverage_20/group_std_mean": 0.16676346063613892,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002184607065282762,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002184607065282762,
"signal/frontier_coverage_25/centered_abs_mean": 0.12204509526491165,
"signal/frontier_coverage_25/group_std_mean": 0.16676346063613892,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002184607065282762,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002184607065282762,
"signal/frontier_coverage_5/centered_abs_mean": 0.12204509526491165,
"signal/frontier_coverage_5/group_std_mean": 0.16676346063613892,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002184607065282762,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002184607065282762,
"signal/frontier_ece_reward/centered_abs_mean": 0.022244375944137574,
"signal/frontier_ece_reward/group_std_mean": 0.028506366163492204,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027805469930171967,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027805469930171967,
"step": 115
},
{
"calibration/aurc": 0.2672984725852937,
"calibration/batch_distribution_entropy": 0.8792197933707804,
"calibration/buffer_distribution_entropy": 0.7740401221561556,
"calibration/confidence_entropy": 0.430121035681503,
"calibration/coverage@0%": 0.027200066755723284,
"calibration/coverage@1%": 0.027200066755723284,
"calibration/coverage@10%": 0.2044821067474349,
"calibration/coverage@15%": 0.42163062243374394,
"calibration/coverage@20%": 0.5291554748877976,
"calibration/coverage@25%": 0.5711437336692795,
"calibration/coverage@30%": 0.5952792553191489,
"calibration/coverage@5%": 0.13044019833467066,
"calibration/ece": 0.15999251951129834,
"calibration/mean_confidence": 0.6908585197289003,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013281250000000022,
"completions/max_length": 3213.6,
"completions/max_terminated_length": 3213.6,
"completions/mean_length": 733.0212890625,
"completions/mean_terminated_length": 742.8743530273438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 226.0,
"epoch": 0.28799640004499943,
"grad_norm": 0.00029223994351923466,
"learning_rate": 2.6506024096385547e-06,
"loss": -0.012,
"num_tokens": 245170551.0,
"reward": 1.0578627586364746,
"reward_std": 0.13152444064617158,
"rewards/accuracy_reward": 0.6881944417953492,
"rewards/brier_reward": 0.8041746258735657,
"rewards/confidence_uniqueness_reward": 0.9153994798660279,
"rewards/format_reward": 0.9867187380790711,
"rewards/frontier_aurc_reward": -0.0015686721657402814,
"rewards/frontier_coverage_1": 0.027476230938918888,
"rewards/frontier_coverage_10": 0.027476230938918888,
"rewards/frontier_coverage_15": 0.027476230938918888,
"rewards/frontier_coverage_20": 0.027476230938918888,
"rewards/frontier_coverage_25": 0.027476230938918888,
"rewards/frontier_coverage_5": 0.027476230938918888,
"rewards/frontier_ece_reward": 0.020292357727885246,
"signal/accuracy_reward/centered_abs_mean": 0.1541992172598839,
"signal/accuracy_reward/group_std_mean": 0.20872304141521453,
"signal/accuracy_reward/group_zero_std_frac": 0.39166666865348815,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07709960862994195,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07709960862994195,
"signal/advantage_abs_mean": 0.0929091677069664,
"signal/advantage_pre_scale_abs_mean": 0.0929091677069664,
"signal/advantage_pre_scale_std": 0.16713809072971345,
"signal/advantage_std": 0.16713809072971345,
"signal/brier_reward/centered_abs_mean": 0.14138388335704805,
"signal/brier_reward/group_std_mean": 0.183754500746727,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017672985419631006,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017672985419631006,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04437449499964714,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06984723061323166,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005546811874955893,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005546811874955893,
"signal/format_reward/centered_abs_mean": 0.02253146693110466,
"signal/format_reward/group_std_mean": 0.044410817325115204,
"signal/format_reward/group_zero_std_frac": 0.8111111164093018,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01126573346555233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01126573346555233,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001606982573866844,
"signal/frontier_aurc_reward/group_std_mean": 0.0025633119512349367,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.876498801924754e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.876498801924754e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13902547657489778,
"signal/frontier_coverage_1/group_std_mean": 0.18895367681980133,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024885560385882856,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024885560385882856,
"signal/frontier_coverage_10/centered_abs_mean": 0.13902547657489778,
"signal/frontier_coverage_10/group_std_mean": 0.18895367681980133,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024885560385882856,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024885560385882856,
"signal/frontier_coverage_15/centered_abs_mean": 0.13902547657489778,
"signal/frontier_coverage_15/group_std_mean": 0.18895367681980133,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024885560385882856,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024885560385882856,
"signal/frontier_coverage_20/centered_abs_mean": 0.13902547657489778,
"signal/frontier_coverage_20/group_std_mean": 0.18895367681980133,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024885560385882856,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024885560385882856,
"signal/frontier_coverage_25/centered_abs_mean": 0.13902547657489778,
"signal/frontier_coverage_25/group_std_mean": 0.18895367681980133,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024885560385882856,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024885560385882856,
"signal/frontier_coverage_5/centered_abs_mean": 0.13902547657489778,
"signal/frontier_coverage_5/group_std_mean": 0.18895367681980133,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024885560385882856,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024885560385882856,
"signal/frontier_ece_reward/centered_abs_mean": 0.022856369987130164,
"signal/frontier_ece_reward/group_std_mean": 0.02880855239927769,
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028570462483912705,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028570462483912705,
"step": 120
},
{
"calibration/aurc": 0.1551460457832317,
"calibration/batch_distribution_entropy": 0.7586701247838616,
"calibration/buffer_distribution_entropy": 0.7802119276107362,
"calibration/confidence_entropy": 0.36355490124193934,
"calibration/coverage@0%": 0.014926286768201092,
"calibration/coverage@1%": 0.014926286768201092,
"calibration/coverage@10%": 0.40697873647633714,
"calibration/coverage@15%": 0.5172635027475139,
"calibration/coverage@20%": 0.7615805455715663,
"calibration/coverage@25%": 0.8873517883711664,
"calibration/coverage@30%": 0.9523954341064982,
"calibration/coverage@5%": 0.1930317832221018,
"calibration/ece": 0.10565811367996707,
"calibration/mean_confidence": 0.7530231723896744,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014149305555555557,
"completions/max_length": 3340.4,
"completions/max_terminated_length": 3340.4,
"completions/mean_length": 733.7210815429687,
"completions/mean_terminated_length": 744.2840087890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 227.8,
"epoch": 0.2999962500468744,
"grad_norm": 0.00039019520045258105,
"learning_rate": 2.5e-06,
"loss": -0.0142,
"num_tokens": 256740682.0,
"reward": 1.0580396413803101,
"reward_std": 0.13579430282115937,
"rewards/accuracy_reward": 0.6907986164093017,
"rewards/brier_reward": 0.8093456387519836,
"rewards/confidence_uniqueness_reward": 0.8919880509376525,
"rewards/format_reward": 0.985850703716278,
"rewards/frontier_aurc_reward": -0.0017035908997058869,
"rewards/frontier_coverage_1": 0.03645942322909832,
"rewards/frontier_coverage_10": 0.03645942322909832,
"rewards/frontier_coverage_15": 0.03645942322909832,
"rewards/frontier_coverage_20": 0.03645942322909832,
"rewards/frontier_coverage_25": 0.03645942322909832,
"rewards/frontier_coverage_5": 0.03645942322909832,
"rewards/frontier_ece_reward": 0.025304096192121504,
"signal/accuracy_reward/centered_abs_mean": 0.15849609375,
"signal/accuracy_reward/group_std_mean": 0.2060663789510727,
"signal/accuracy_reward/group_zero_std_frac": 0.4222222208976746,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.079248046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.079248046875,
"signal/advantage_abs_mean": 0.09873658120632171,
"signal/advantage_pre_scale_abs_mean": 0.09873658120632171,
"signal/advantage_pre_scale_std": 0.17506541907787324,
"signal/advantage_std": 0.17506541907787324,
"signal/brier_reward/centered_abs_mean": 0.1506500333547592,
"signal/brier_reward/group_std_mean": 0.19419657588005065,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0188312541693449,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0188312541693449,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05950758457183838,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08664509057998657,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007438448071479797,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007438448071479797,
"signal/format_reward/centered_abs_mean": 0.02527669258415699,
"signal/format_reward/group_std_mean": 0.0472879096865654,
"signal/format_reward/group_zero_std_frac": 0.8138888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012638346292078495,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012638346292078495,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022916203830391168,
"signal/frontier_aurc_reward/group_std_mean": 0.0036330488976091145,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.102000239072368e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.102000239072368e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13295071721076965,
"signal/frontier_coverage_1/group_std_mean": 0.18857296407222748,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00237981784157455,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00237981784157455,
"signal/frontier_coverage_10/centered_abs_mean": 0.13295071721076965,
"signal/frontier_coverage_10/group_std_mean": 0.18857296407222748,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00237981784157455,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00237981784157455,
"signal/frontier_coverage_15/centered_abs_mean": 0.13295071721076965,
"signal/frontier_coverage_15/group_std_mean": 0.18857296407222748,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00237981784157455,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00237981784157455,
"signal/frontier_coverage_20/centered_abs_mean": 0.13295071721076965,
"signal/frontier_coverage_20/group_std_mean": 0.18857296407222748,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00237981784157455,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00237981784157455,
"signal/frontier_coverage_25/centered_abs_mean": 0.13295071721076965,
"signal/frontier_coverage_25/group_std_mean": 0.18857296407222748,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00237981784157455,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00237981784157455,
"signal/frontier_coverage_5/centered_abs_mean": 0.13295071721076965,
"signal/frontier_coverage_5/group_std_mean": 0.18857296407222748,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00237981784157455,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00237981784157455,
"signal/frontier_ece_reward/centered_abs_mean": 0.024529390409588812,
"signal/frontier_ece_reward/group_std_mean": 0.03039325512945652,
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030661738011986015,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030661738011986015,
"step": 125
},
{
"calibration/aurc": 0.21937393087793167,
"calibration/batch_distribution_entropy": 0.7982473467783474,
"calibration/buffer_distribution_entropy": 0.7814824550767334,
"calibration/confidence_entropy": 0.3547674800413071,
"calibration/coverage@0%": 0.010066104916730503,
"calibration/coverage@1%": 0.010066104916730503,
"calibration/coverage@10%": 0.1716406920783105,
"calibration/coverage@15%": 0.31217969811753826,
"calibration/coverage@20%": 0.51458351029786,
"calibration/coverage@25%": 0.6728991822165153,
"calibration/coverage@30%": 0.8798521895852263,
"calibration/coverage@5%": 0.05594028905782202,
"calibration/ece": 0.13592891891689865,
"calibration/mean_confidence": 0.713633518271023,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015364583333333326,
"completions/max_length": 3396.2,
"completions/max_terminated_length": 3396.2,
"completions/mean_length": 766.4459350585937,
"completions/mean_terminated_length": 778.4260375976562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 185.4,
"epoch": 0.3119961000487494,
"grad_norm": 0.0004305084585212171,
"learning_rate": 2.349397590361446e-06,
"loss": -0.0145,
"num_tokens": 268694939.0,
"reward": 1.0496493101119995,
"reward_std": 0.1406361937522888,
"rewards/accuracy_reward": 0.6756076455116272,
"rewards/brier_reward": 0.800056254863739,
"rewards/confidence_uniqueness_reward": 0.8977401375770568,
"rewards/format_reward": 0.9846354126930237,
"rewards/frontier_aurc_reward": -0.001901687984354794,
"rewards/frontier_coverage_1": 0.0432504091411829,
"rewards/frontier_coverage_10": 0.0432504091411829,
"rewards/frontier_coverage_15": 0.0432504091411829,
"rewards/frontier_coverage_20": 0.0432504091411829,
"rewards/frontier_coverage_25": 0.0432504091411829,
"rewards/frontier_coverage_5": 0.0432504091411829,
"rewards/frontier_ece_reward": 0.02153747119009495,
"signal/accuracy_reward/centered_abs_mean": 0.16565212607383728,
"signal/accuracy_reward/group_std_mean": 0.21800636351108552,
"signal/accuracy_reward/group_zero_std_frac": 0.37777777910232546,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08282606303691864,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08282606303691864,
"signal/advantage_abs_mean": 0.1018408328294754,
"signal/advantage_pre_scale_abs_mean": 0.1018408328294754,
"signal/advantage_pre_scale_std": 0.17831650972366334,
"signal/advantage_std": 0.17831650972366334,
"signal/brier_reward/centered_abs_mean": 0.15308336317539215,
"signal/brier_reward/group_std_mean": 0.19842869639396668,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019135420396924018,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.019135420396924018,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0566624753177166,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08453233689069747,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007082809414714575,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007082809414714575,
"signal/format_reward/centered_abs_mean": 0.0266004778444767,
"signal/format_reward/group_std_mean": 0.049190875887870786,
"signal/format_reward/group_zero_std_frac": 0.8055555820465088,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01330023892223835,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01330023892223835,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002477661520242691,
"signal/frontier_aurc_reward/group_std_mean": 0.0040058012586086985,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.435014052432962e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.435014052432962e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14277483373880387,
"signal/frontier_coverage_1/group_std_mean": 0.19749794900417328,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025556694716215135,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025556694716215135,
"signal/frontier_coverage_10/centered_abs_mean": 0.14277483373880387,
"signal/frontier_coverage_10/group_std_mean": 0.19749794900417328,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025556694716215135,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025556694716215135,
"signal/frontier_coverage_15/centered_abs_mean": 0.14277483373880387,
"signal/frontier_coverage_15/group_std_mean": 0.19749794900417328,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025556694716215135,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025556694716215135,
"signal/frontier_coverage_20/centered_abs_mean": 0.14277483373880387,
"signal/frontier_coverage_20/group_std_mean": 0.19749794900417328,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025556694716215135,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025556694716215135,
"signal/frontier_coverage_25/centered_abs_mean": 0.14277483373880387,
"signal/frontier_coverage_25/group_std_mean": 0.19749794900417328,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025556694716215135,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025556694716215135,
"signal/frontier_coverage_5/centered_abs_mean": 0.14277483373880387,
"signal/frontier_coverage_5/group_std_mean": 0.19749794900417328,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025556694716215135,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025556694716215135,
"signal/frontier_ece_reward/centered_abs_mean": 0.023040265217423438,
"signal/frontier_ece_reward/group_std_mean": 0.028788076341152193,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028800331521779297,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028800331521779297,
"step": 130
},
{
"calibration/aurc": 0.21178649539996738,
"calibration/batch_distribution_entropy": 0.8621768605224684,
"calibration/buffer_distribution_entropy": 0.7861469496776874,
"calibration/confidence_entropy": 0.41761171099367456,
"calibration/coverage@0%": 0.011613063660477454,
"calibration/coverage@1%": 0.10067556366047745,
"calibration/coverage@10%": 0.26419098143236075,
"calibration/coverage@15%": 0.37293739888807675,
"calibration/coverage@20%": 0.548718324485379,
"calibration/coverage@25%": 0.6227433237201573,
"calibration/coverage@30%": 0.7633802420489727,
"calibration/coverage@5%": 0.19650889699381077,
"calibration/ece": 0.1498688341904781,
"calibration/mean_confidence": 0.6908206350495991,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3772.4,
"completions/max_terminated_length": 3772.4,
"completions/mean_length": 758.24306640625,
"completions/mean_terminated_length": 767.1976684570312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 205.2,
"epoch": 0.32399595005062437,
"grad_norm": 0.00038663134910166264,
"learning_rate": 2.1987951807228917e-06,
"loss": -0.0098,
"num_tokens": 280522923.0,
"reward": 1.0594348669052125,
"reward_std": 0.12770668268203736,
"rewards/accuracy_reward": 0.6868055582046508,
"rewards/brier_reward": 0.8069122552871704,
"rewards/confidence_uniqueness_reward": 0.9251725912094116,
"rewards/format_reward": 0.9881944537162781,
"rewards/frontier_aurc_reward": -0.0012947394163347781,
"rewards/frontier_coverage_1": 0.033542437851428984,
"rewards/frontier_coverage_10": 0.033542437851428984,
"rewards/frontier_coverage_15": 0.033542437851428984,
"rewards/frontier_coverage_20": 0.033542437851428984,
"rewards/frontier_coverage_25": 0.033542437851428984,
"rewards/frontier_coverage_5": 0.033542437851428984,
"rewards/frontier_ece_reward": 0.014759739115834235,
"signal/accuracy_reward/centered_abs_mean": 0.15372178852558135,
"signal/accuracy_reward/group_std_mean": 0.20507141947746277,
"signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07686089426279068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07686089426279068,
"signal/advantage_abs_mean": 0.09132870435714721,
"signal/advantage_pre_scale_abs_mean": 0.09132870435714721,
"signal/advantage_pre_scale_std": 0.16071320176124573,
"signal/advantage_std": 0.16071320176124573,
"signal/brier_reward/centered_abs_mean": 0.14716649651527405,
"signal/brier_reward/group_std_mean": 0.19055280685424805,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018395812064409257,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018395812064409257,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.038749721646308896,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06294624656438827,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004843715205788612,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004843715205788612,
"signal/format_reward/centered_abs_mean": 0.02109375,
"signal/format_reward/group_std_mean": 0.042469137161970136,
"signal/format_reward/group_zero_std_frac": 0.8166666746139526,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010546875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010546875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013480266556143762,
"signal/frontier_aurc_reward/group_std_mean": 0.0022415920160710812,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4129677694872952e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4129677694872952e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16463718116283416,
"signal/frontier_coverage_1/group_std_mean": 0.22296231091022492,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002947005443274975,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002947005443274975,
"signal/frontier_coverage_10/centered_abs_mean": 0.16463718116283416,
"signal/frontier_coverage_10/group_std_mean": 0.22296231091022492,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002947005443274975,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002947005443274975,
"signal/frontier_coverage_15/centered_abs_mean": 0.16463718116283416,
"signal/frontier_coverage_15/group_std_mean": 0.22296231091022492,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002947005443274975,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002947005443274975,
"signal/frontier_coverage_20/centered_abs_mean": 0.16463718116283416,
"signal/frontier_coverage_20/group_std_mean": 0.22296231091022492,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002947005443274975,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002947005443274975,
"signal/frontier_coverage_25/centered_abs_mean": 0.16463718116283416,
"signal/frontier_coverage_25/group_std_mean": 0.22296231091022492,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002947005443274975,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002947005443274975,
"signal/frontier_coverage_5/centered_abs_mean": 0.16463718116283416,
"signal/frontier_coverage_5/group_std_mean": 0.22296231091022492,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002947005443274975,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002947005443274975,
"signal/frontier_ece_reward/centered_abs_mean": 0.019040508940815926,
"signal/frontier_ece_reward/group_std_mean": 0.02431493140757084,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002380063617601991,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002380063617601991,
"step": 135
},
{
"calibration/aurc": 0.15735400009669226,
"calibration/batch_distribution_entropy": 0.867213508049151,
"calibration/buffer_distribution_entropy": 0.7960778689126401,
"calibration/confidence_entropy": 0.43905768456117134,
"calibration/coverage@0%": 0.031665669247906085,
"calibration/coverage@1%": 0.031665669247906085,
"calibration/coverage@10%": 0.36938394329841706,
"calibration/coverage@15%": 0.577238562485273,
"calibration/coverage@20%": 0.7212511045241308,
"calibration/coverage@25%": 0.8655174528200844,
"calibration/coverage@30%": 0.9547358513805883,
"calibration/coverage@5%": 0.06221147821476769,
"calibration/ece": 0.10685238076566583,
"calibration/mean_confidence": 0.6762039937526211,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014236111111111093,
"completions/max_length": 3602.8,
"completions/max_terminated_length": 3602.8,
"completions/mean_length": 773.11328125,
"completions/mean_terminated_length": 784.2122314453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 239.6,
"epoch": 0.33599580005249935,
"grad_norm": 0.00038202741416171193,
"learning_rate": 2.0481927710843377e-06,
"loss": -0.0127,
"num_tokens": 292533412.0,
"reward": 1.0550405263900757,
"reward_std": 0.12688146978616716,
"rewards/accuracy_reward": 0.6841145992279053,
"rewards/brier_reward": 0.8033087968826294,
"rewards/confidence_uniqueness_reward": 0.9210085034370422,
"rewards/format_reward": 0.9856770753860473,
"rewards/frontier_aurc_reward": -0.0011435442487709225,
"rewards/frontier_coverage_1": 0.028446093632373957,
"rewards/frontier_coverage_10": 0.028446093632373957,
"rewards/frontier_coverage_15": 0.028446093632373957,
"rewards/frontier_coverage_20": 0.028446093632373957,
"rewards/frontier_coverage_25": 0.027299534215126188,
"rewards/frontier_coverage_5": 0.028446093632373957,
"rewards/frontier_ece_reward": 0.012727185152471066,
"signal/accuracy_reward/centered_abs_mean": 0.1436794728040695,
"signal/accuracy_reward/group_std_mean": 0.19504518210887908,
"signal/accuracy_reward/group_zero_std_frac": 0.4333333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07183973640203475,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07183973640203475,
"signal/advantage_abs_mean": 0.09040538519620896,
"signal/advantage_pre_scale_abs_mean": 0.09040538519620896,
"signal/advantage_pre_scale_std": 0.1635303646326065,
"signal/advantage_std": 0.1635303646326065,
"signal/brier_reward/centered_abs_mean": 0.14187564551830292,
"signal/brier_reward/group_std_mean": 0.18244777321815492,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017734455689787865,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017734455689787865,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.041977598518133166,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06911587193608285,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005247199814766646,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005247199814766646,
"signal/format_reward/centered_abs_mean": 0.02453884594142437,
"signal/format_reward/group_std_mean": 0.04877747595310211,
"signal/format_reward/group_zero_std_frac": 0.7888888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012269422970712185,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.012269422970712185,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011516727041453122,
"signal/frontier_aurc_reward/group_std_mean": 0.001979802688583732,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0614940876839682e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0614940876839682e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1493788868188858,
"signal/frontier_coverage_1/group_std_mean": 0.20525516271591188,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026738820131868126,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026738820131868126,
"signal/frontier_coverage_10/centered_abs_mean": 0.1493788868188858,
"signal/frontier_coverage_10/group_std_mean": 0.20525516271591188,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026738820131868126,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026738820131868126,
"signal/frontier_coverage_15/centered_abs_mean": 0.1493788868188858,
"signal/frontier_coverage_15/group_std_mean": 0.20525516271591188,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026738820131868126,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026738820131868126,
"signal/frontier_coverage_20/centered_abs_mean": 0.1493788868188858,
"signal/frontier_coverage_20/group_std_mean": 0.20525516271591188,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026738820131868126,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026738820131868126,
"signal/frontier_coverage_25/centered_abs_mean": 0.14764404296875,
"signal/frontier_coverage_25/group_std_mean": 0.20283401310443877,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002642828319221735,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002642828319221735,
"signal/frontier_coverage_5/centered_abs_mean": 0.1493788868188858,
"signal/frontier_coverage_5/group_std_mean": 0.20525516271591188,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026738820131868126,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026738820131868126,
"signal/frontier_ece_reward/centered_abs_mean": 0.016555101424455643,
"signal/frontier_ece_reward/group_std_mean": 0.021539781242609024,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020693876780569554,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020693876780569554,
"step": 140
},
{
"calibration/aurc": 0.17760189147420782,
"calibration/batch_distribution_entropy": 0.8614810747703359,
"calibration/buffer_distribution_entropy": 0.8124016039305744,
"calibration/confidence_entropy": 0.4269929605650483,
"calibration/coverage@0%": 0.04225979049059177,
"calibration/coverage@1%": 0.04225979049059177,
"calibration/coverage@10%": 0.29135738789953486,
"calibration/coverage@15%": 0.5114959489018647,
"calibration/coverage@20%": 0.6514218042908502,
"calibration/coverage@25%": 0.7691949016487275,
"calibration/coverage@30%": 0.8515852075218829,
"calibration/coverage@5%": 0.07188663538059455,
"calibration/ece": 0.11239056071129601,
"calibration/mean_confidence": 0.6774057122905555,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014756944444444442,
"completions/max_length": 3140.8,
"completions/max_terminated_length": 3140.8,
"completions/mean_length": 768.288916015625,
"completions/mean_terminated_length": 779.8587036132812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 228.8,
"epoch": 0.34799565005437433,
"grad_norm": 0.0003542336344253272,
"learning_rate": 1.8975903614457832e-06,
"loss": -0.0128,
"num_tokens": 304448708.0,
"reward": 1.0733104705810548,
"reward_std": 0.12197275906801223,
"rewards/accuracy_reward": 0.7184027671813965,
"rewards/brier_reward": 0.8237447381019593,
"rewards/confidence_uniqueness_reward": 0.9106087207794189,
"rewards/format_reward": 0.98515625,
"rewards/frontier_aurc_reward": -0.0011269306181930006,
"rewards/frontier_coverage_1": 0.02765751425176859,
"rewards/frontier_coverage_10": 0.02765751425176859,
"rewards/frontier_coverage_15": 0.02765751425176859,
"rewards/frontier_coverage_20": 0.02765751425176859,
"rewards/frontier_coverage_25": 0.029555964469909667,
"rewards/frontier_coverage_5": 0.02765751425176859,
"rewards/frontier_ece_reward": 0.014020322076976299,
"signal/accuracy_reward/centered_abs_mean": 0.1314453125,
"signal/accuracy_reward/group_std_mean": 0.1816246747970581,
"signal/accuracy_reward/group_zero_std_frac": 0.45277778506278993,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06572265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06572265625,
"signal/advantage_abs_mean": 0.08338052183389663,
"signal/advantage_pre_scale_abs_mean": 0.08338052183389663,
"signal/advantage_pre_scale_std": 0.16272049248218537,
"signal/advantage_std": 0.16272049248218537,
"signal/brier_reward/centered_abs_mean": 0.12779132276773453,
"signal/brier_reward/group_std_mean": 0.16802054941654204,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015973915345966816,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015973915345966816,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.046132729202508924,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07453691065311432,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0057665911503136155,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0057665911503136155,
"signal/format_reward/centered_abs_mean": 0.02614474855363369,
"signal/format_reward/group_std_mean": 0.05145877227187157,
"signal/format_reward/group_zero_std_frac": 0.7833333492279053,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013072374276816845,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013072374276816845,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001297543675173074,
"signal/frontier_aurc_reward/group_std_mean": 0.0022464465117082,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3226030680234545e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3226030680234545e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1275523856282234,
"signal/frontier_coverage_1/group_std_mean": 0.17685183584690095,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022831874433904887,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022831874433904887,
"signal/frontier_coverage_10/centered_abs_mean": 0.1275523856282234,
"signal/frontier_coverage_10/group_std_mean": 0.17685183584690095,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022831874433904887,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022831874433904887,
"signal/frontier_coverage_15/centered_abs_mean": 0.1275523856282234,
"signal/frontier_coverage_15/group_std_mean": 0.17685183584690095,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022831874433904887,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022831874433904887,
"signal/frontier_coverage_20/centered_abs_mean": 0.1275523856282234,
"signal/frontier_coverage_20/group_std_mean": 0.17685183584690095,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022831874433904887,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022831874433904887,
"signal/frontier_coverage_25/centered_abs_mean": 0.09737538546323776,
"signal/frontier_coverage_25/group_std_mean": 0.13730859458446504,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017430193023756147,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017430193023756147,
"signal/frontier_coverage_5/centered_abs_mean": 0.1275523856282234,
"signal/frontier_coverage_5/group_std_mean": 0.17685183584690095,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022831874433904887,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022831874433904887,
"signal/frontier_ece_reward/centered_abs_mean": 0.014937486127018928,
"signal/frontier_ece_reward/group_std_mean": 0.01869678348302841,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001867185765877366,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001867185765877366,
"step": 145
},
{
"calibration/aurc": 0.1711177612994187,
"calibration/batch_distribution_entropy": 0.7979294550533148,
"calibration/buffer_distribution_entropy": 0.8252856931612047,
"calibration/confidence_entropy": 0.3867997528995013,
"calibration/coverage@0%": 0.028857090429138067,
"calibration/coverage@1%": 0.028857090429138067,
"calibration/coverage@10%": 0.39941504381293946,
"calibration/coverage@15%": 0.4653468268344284,
"calibration/coverage@20%": 0.6041595882856419,
"calibration/coverage@25%": 0.7318645921120241,
"calibration/coverage@30%": 0.8313806917894677,
"calibration/coverage@5%": 0.20213240230711796,
"calibration/ece": 0.14017483881446374,
"calibration/mean_confidence": 0.7112044312383862,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010416666666666675,
"completions/max_length": 3437.8,
"completions/max_terminated_length": 3437.8,
"completions/mean_length": 839.7661499023437,
"completions/mean_terminated_length": 848.5783813476562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 249.6,
"epoch": 0.3599955000562493,
"grad_norm": 0.00046212406596168876,
"learning_rate": 1.7469879518072292e-06,
"loss": -0.008,
"num_tokens": 317233150.0,
"reward": 1.065112328529358,
"reward_std": 0.12633269131183625,
"rewards/accuracy_reward": 0.70234375,
"rewards/brier_reward": 0.8162197589874267,
"rewards/confidence_uniqueness_reward": 0.8988808989524841,
"rewards/format_reward": 0.9894965291023254,
"rewards/frontier_aurc_reward": -0.0015927208121865987,
"rewards/frontier_coverage_1": 0.03101687040179968,
"rewards/frontier_coverage_10": 0.03101687040179968,
"rewards/frontier_coverage_15": 0.03101687040179968,
"rewards/frontier_coverage_20": 0.031029899418354035,
"rewards/frontier_coverage_25": 0.0334394596517086,
"rewards/frontier_coverage_5": 0.03101687040179968,
"rewards/frontier_ece_reward": 0.011666352301836014,
"signal/accuracy_reward/centered_abs_mean": 0.15368380695581435,
"signal/accuracy_reward/group_std_mean": 0.1997191309928894,
"signal/accuracy_reward/group_zero_std_frac": 0.43611111044883727,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07684190347790718,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07684190347790718,
"signal/advantage_abs_mean": 0.09196306616067887,
"signal/advantage_pre_scale_abs_mean": 0.09196306616067887,
"signal/advantage_pre_scale_std": 0.165494641661644,
"signal/advantage_std": 0.165494641661644,
"signal/brier_reward/centered_abs_mean": 0.13116701394319535,
"signal/brier_reward/group_std_mean": 0.17333021759986877,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01639587674289942,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01639587674289942,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.046421286463737485,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07029250860214234,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005802660807967186,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005802660807967186,
"signal/format_reward/centered_abs_mean": 0.01837565079331398,
"signal/format_reward/group_std_mean": 0.0379862654954195,
"signal/format_reward/group_zero_std_frac": 0.8305555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00918782539665699,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00918782539665699,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020536962430924175,
"signal/frontier_aurc_reward/group_std_mean": 0.003437606617808342,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.67611584806582e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.67611584806582e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.11324008107185364,
"signal/frontier_coverage_1/group_std_mean": 0.1635311007499695,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020269973436370494,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020269973436370494,
"signal/frontier_coverage_10/centered_abs_mean": 0.11324008107185364,
"signal/frontier_coverage_10/group_std_mean": 0.1635311007499695,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020269973436370494,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020269973436370494,
"signal/frontier_coverage_15/centered_abs_mean": 0.11324008107185364,
"signal/frontier_coverage_15/group_std_mean": 0.1635311007499695,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020269973436370494,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020269973436370494,
"signal/frontier_coverage_20/centered_abs_mean": 0.10615915805101395,
"signal/frontier_coverage_20/group_std_mean": 0.15414920151233674,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019002489047124983,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019002489047124983,
"signal/frontier_coverage_25/centered_abs_mean": 0.060843870788812635,
"signal/frontier_coverage_25/group_std_mean": 0.08873464614152908,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001089105277787894,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001089105277787894,
"signal/frontier_coverage_5/centered_abs_mean": 0.11324008107185364,
"signal/frontier_coverage_5/group_std_mean": 0.1635311007499695,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020269973436370494,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020269973436370494,
"signal/frontier_ece_reward/centered_abs_mean": 0.01169421263039112,
"signal/frontier_ece_reward/group_std_mean": 0.014634997583925724,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00146177657879889,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00146177657879889,
"step": 150
},
{
"epoch": 0.3599955000562493,
"eval_calibration/aurc": 0.16175791904643544,
"eval_calibration/batch_distribution_entropy": 0.7352031129148272,
"eval_calibration/buffer_distribution_entropy": 0.832478781306993,
"eval_calibration/confidence_entropy": 0.39508010082462275,
"eval_calibration/coverage@0%": 0.19808467741935484,
"eval_calibration/coverage@1%": 0.19808467741935484,
"eval_calibration/coverage@10%": 0.47211021505376344,
"eval_calibration/coverage@15%": 0.5816532258064516,
"eval_calibration/coverage@20%": 0.7856182795698925,
"eval_calibration/coverage@25%": 0.8489583333333334,
"eval_calibration/coverage@30%": 0.9010416666666666,
"eval_calibration/coverage@5%": 0.21370967741935484,
"eval_calibration/ece": 0.16735424483653735,
"eval_calibration/mean_confidence": 0.749357968833574,
"eval_completions/clipped_ratio": 0.006944444444444457,
"eval_completions/max_length": 2777.6666666666665,
"eval_completions/max_terminated_length": 2777.6666666666665,
"eval_completions/mean_length": 823.3238016764323,
"eval_completions/mean_terminated_length": 829.1297912597656,
"eval_completions/min_length": 111.33333333333333,
"eval_completions/min_terminated_length": 290.0,
"eval_loss": 0.0,
"eval_num_tokens": 317233150.0,
"eval_reward": 1.0572884281476338,
"eval_reward_std": 0.25147593518098194,
"eval_rewards/accuracy_reward": 0.6996527711550394,
"eval_rewards/brier_reward": 0.8065233925978342,
"eval_rewards/confidence_uniqueness_reward": 0.8552062610785166,
"eval_rewards/format_reward": 0.9921875099341074,
"eval_rewards/frontier_aurc_reward": -0.00195368086375917,
"eval_rewards/frontier_coverage_1": 0.021662883625443403,
"eval_rewards/frontier_coverage_10": 0.021662883625443403,
"eval_rewards/frontier_coverage_15": 0.021662883625443403,
"eval_rewards/frontier_coverage_20": 0.02181592263514176,
"eval_rewards/frontier_coverage_25": 0.035129744869967304,
"eval_rewards/frontier_coverage_5": 0.021662883625443403,
"eval_rewards/frontier_ece_reward": 0.008933214703574777,
"eval_runtime": 198.6148,
"eval_samples_per_second": 5.035,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4021267344554265,
"eval_signal/accuracy_reward/group_std_mean": 0.4533983866373698,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20106336722771326,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20106336722771326,
"eval_signal/advantage_abs_mean": 0.2142608513434728,
"eval_signal/advantage_pre_scale_abs_mean": 0.2142608513434728,
"eval_signal/advantage_pre_scale_std": 0.2503946051001549,
"eval_signal/advantage_std": 0.2503946051001549,
"eval_signal/brier_reward/centered_abs_mean": 0.22845095644394556,
"eval_signal/brier_reward/group_std_mean": 0.28837570548057556,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028556369555493195,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.028556369555493195,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06686499528586864,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09637108817696571,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00835812441073358,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00835812441073358,
"eval_signal/format_reward/centered_abs_mean": 0.015136718594779571,
"eval_signal/format_reward/group_std_mean": 0.04419417337824901,
"eval_signal/format_reward/group_zero_std_frac": 0.750000019868215,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007568359297389786,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359297389786,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003255114386168619,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0065320210220913095,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.82665494827476e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.82665494827476e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.15939685453971228,
"eval_signal/frontier_coverage_1/group_std_mean": 0.28508878250916797,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028532035648822784,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028532035648822784,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.15939685453971228,
"eval_signal/frontier_coverage_10/group_std_mean": 0.28508878250916797,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028532035648822784,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028532035648822784,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.15939685453971228,
"eval_signal/frontier_coverage_15/group_std_mean": 0.28508878250916797,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028532035648822784,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028532035648822784,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.12750840187072754,
"eval_signal/frontier_coverage_20/group_std_mean": 0.23698227355877557,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022824003632801273,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022824003632801273,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.07293465360999107,
"eval_signal/frontier_coverage_25/group_std_mean": 0.12300009404619534,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013055303134024143,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013055303134024143,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.15939685453971228,
"eval_signal/frontier_coverage_5/group_std_mean": 0.28508878250916797,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028532035648822784,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028532035648822784,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.013653319949905077,
"eval_signal/frontier_ece_reward/group_std_mean": 0.017306591384112835,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017066649937381346,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017066649937381346,
"eval_steps_per_second": 0.03,
"step": 150
},
{
"calibration/aurc": 0.15804363675783265,
"calibration/batch_distribution_entropy": 0.8399256406752158,
"calibration/buffer_distribution_entropy": 0.8364387748440253,
"calibration/confidence_entropy": 0.4143819803032832,
"calibration/coverage@0%": 0.014081538294168843,
"calibration/coverage@1%": 0.014081538294168843,
"calibration/coverage@10%": 0.4197884627652546,
"calibration/coverage@15%": 0.5361398299340652,
"calibration/coverage@20%": 0.6640488229008608,
"calibration/coverage@25%": 0.8362543516100958,
"calibration/coverage@30%": 0.9181149369016536,
"calibration/coverage@5%": 0.0701024807025458,
"calibration/ece": 0.10372717760673841,
"calibration/mean_confidence": 0.7007285385392891,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.007725694444444442,
"completions/max_length": 3224.8,
"completions/max_terminated_length": 3224.8,
"completions/mean_length": 815.9788208007812,
"completions/mean_terminated_length": 822.4533203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 248.4,
"epoch": 0.3719953500581243,
"grad_norm": 0.0004315991827752441,
"learning_rate": 1.5963855421686747e-06,
"loss": -0.0049,
"num_tokens": 329740938.0,
"reward": 1.0920594453811645,
"reward_std": 0.11862210929393768,
"rewards/accuracy_reward": 0.7427083373069763,
"rewards/brier_reward": 0.8469986200332642,
"rewards/confidence_uniqueness_reward": 0.9098951697349549,
"rewards/format_reward": 0.9921874880790711,
"rewards/frontier_aurc_reward": -0.001068349787965417,
"rewards/frontier_coverage_1": 0.032322213798761365,
"rewards/frontier_coverage_10": 0.032322213798761365,
"rewards/frontier_coverage_15": 0.032322213798761365,
"rewards/frontier_coverage_20": 0.033581113815307616,
"rewards/frontier_coverage_25": 0.053568636626005174,
"rewards/frontier_coverage_5": 0.032322213798761365,
"rewards/frontier_ece_reward": 0.00915743401274085,
"signal/accuracy_reward/centered_abs_mean": 0.15378689169883727,
"signal/accuracy_reward/group_std_mean": 0.2032044380903244,
"signal/accuracy_reward/group_zero_std_frac": 0.4138888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07689344584941864,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07689344584941864,
"signal/advantage_abs_mean": 0.08521311953663827,
"signal/advantage_pre_scale_abs_mean": 0.08521311953663827,
"signal/advantage_pre_scale_std": 0.15650778114795685,
"signal/advantage_std": 0.15650778114795685,
"signal/brier_reward/centered_abs_mean": 0.11616129875183105,
"signal/brier_reward/group_std_mean": 0.1569095641374588,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014520162343978881,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014520162343978881,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03967732265591621,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05846917554736138,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004959665331989526,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004959665331989526,
"signal/format_reward/centered_abs_mean": 0.013953992887400091,
"signal/format_reward/group_std_mean": 0.02798333503305912,
"signal/format_reward/group_zero_std_frac": 0.8805555462837219,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0069769964437000455,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0069769964437000455,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014973450219258667,
"signal/frontier_aurc_reward/group_std_mean": 0.0027009368874132632,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6802473803400063e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6802473803400063e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12041537314653397,
"signal/frontier_coverage_1/group_std_mean": 0.1706594407558441,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021554350852966307,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021554350852966307,
"signal/frontier_coverage_10/centered_abs_mean": 0.12041537314653397,
"signal/frontier_coverage_10/group_std_mean": 0.1706594407558441,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021554350852966307,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021554350852966307,
"signal/frontier_coverage_15/centered_abs_mean": 0.12041537314653397,
"signal/frontier_coverage_15/group_std_mean": 0.1706594407558441,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021554350852966307,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021554350852966307,
"signal/frontier_coverage_20/centered_abs_mean": 0.08516337871551513,
"signal/frontier_coverage_20/group_std_mean": 0.12315509170293808,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015244244365021586,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015244244365021586,
"signal/frontier_coverage_25/centered_abs_mean": 0.05519420728087425,
"signal/frontier_coverage_25/group_std_mean": 0.07446658313274383,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009879762423224748,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009879762423224748,
"signal/frontier_coverage_5/centered_abs_mean": 0.12041537314653397,
"signal/frontier_coverage_5/group_std_mean": 0.1706594407558441,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021554350852966307,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021554350852966307,
"signal/frontier_ece_reward/centered_abs_mean": 0.008705221116542816,
"signal/frontier_ece_reward/group_std_mean": 0.011256015487015247,
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001088152639567852,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001088152639567852,
"step": 155
},
{
"calibration/aurc": 0.1396131852381405,
"calibration/batch_distribution_entropy": 0.8001701977514258,
"calibration/buffer_distribution_entropy": 0.8432206775920253,
"calibration/confidence_entropy": 0.42837628908071357,
"calibration/coverage@0%": 0.038418853892540475,
"calibration/coverage@1%": 0.038418853892540475,
"calibration/coverage@10%": 0.6409266970960997,
"calibration/coverage@15%": 0.7230105375149384,
"calibration/coverage@20%": 0.7985909217488165,
"calibration/coverage@25%": 0.8646003898635477,
"calibration/coverage@30%": 0.8846560846560847,
"calibration/coverage@5%": 0.18981191832059213,
"calibration/ece": 0.12949856679199157,
"calibration/mean_confidence": 0.7311427370325003,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010590277777777768,
"completions/max_length": 3660.8,
"completions/max_terminated_length": 3660.8,
"completions/mean_length": 833.2264892578125,
"completions/mean_terminated_length": 842.1909423828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 230.8,
"epoch": 0.38399520005999926,
"grad_norm": 0.00039733736775815487,
"learning_rate": 1.4457831325301204e-06,
"loss": -0.0093,
"num_tokens": 342427003.0,
"reward": 1.054310917854309,
"reward_std": 0.12334007620811463,
"rewards/accuracy_reward": 0.6795138835906982,
"rewards/brier_reward": 0.8059031248092652,
"rewards/confidence_uniqueness_reward": 0.9150711178779602,
"rewards/format_reward": 0.9894097328186036,
"rewards/frontier_aurc_reward": -0.0016405290691182018,
"rewards/frontier_coverage_1": 0.03431166112422943,
"rewards/frontier_coverage_10": 0.03431166112422943,
"rewards/frontier_coverage_15": 0.03431166112422943,
"rewards/frontier_coverage_20": 0.031892279908061025,
"rewards/frontier_coverage_25": 0.05291588976979256,
"rewards/frontier_coverage_5": 0.03431166112422943,
"rewards/frontier_ece_reward": 0.006254972610622645,
"signal/accuracy_reward/centered_abs_mean": 0.15179036557674408,
"signal/accuracy_reward/group_std_mean": 0.19949381947517394,
"signal/accuracy_reward/group_zero_std_frac": 0.43611111044883727,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07589518278837204,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07589518278837204,
"signal/advantage_abs_mean": 0.08981405347585678,
"signal/advantage_pre_scale_abs_mean": 0.08981405347585678,
"signal/advantage_pre_scale_std": 0.1621391087770462,
"signal/advantage_std": 0.1621391087770462,
"signal/brier_reward/centered_abs_mean": 0.133778178691864,
"signal/brier_reward/group_std_mean": 0.17374806702136994,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016722272336483,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016722272336483,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03996127396821976,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06208924725651741,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00499515924602747,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00499515924602747,
"signal/format_reward/centered_abs_mean": 0.01856553815305233,
"signal/format_reward/group_std_mean": 0.037125248461961746,
"signal/format_reward/group_zero_std_frac": 0.8388888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009282769076526165,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009282769076526165,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018948239739984274,
"signal/frontier_aurc_reward/group_std_mean": 0.0033697550650686027,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.39173486281652e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.39173486281652e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1356187418103218,
"signal/frontier_coverage_1/group_std_mean": 0.18712888658046722,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002427575411275029,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002427575411275029,
"signal/frontier_coverage_10/centered_abs_mean": 0.1356187418103218,
"signal/frontier_coverage_10/group_std_mean": 0.18712888658046722,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002427575411275029,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002427575411275029,
"signal/frontier_coverage_15/centered_abs_mean": 0.1356187418103218,
"signal/frontier_coverage_15/group_std_mean": 0.18712888658046722,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002427575411275029,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002427575411275029,
"signal/frontier_coverage_20/centered_abs_mean": 0.08282427489757538,
"signal/frontier_coverage_20/group_std_mean": 0.1169714629650116,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014825545251369477,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014825545251369477,
"signal/frontier_coverage_25/centered_abs_mean": 0.060303305834531785,
"signal/frontier_coverage_25/group_std_mean": 0.07883718758821487,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010794291738420725,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010794291738420725,
"signal/frontier_coverage_5/centered_abs_mean": 0.1356187418103218,
"signal/frontier_coverage_5/group_std_mean": 0.18712888658046722,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002427575411275029,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002427575411275029,
"signal/frontier_ece_reward/centered_abs_mean": 0.008292005583643913,
"signal/frontier_ece_reward/group_std_mean": 0.010889817215502261,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001036500697955489,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001036500697955489,
"step": 160
},
{
"calibration/aurc": 0.1401170386334239,
"calibration/batch_distribution_entropy": 0.8628322122419168,
"calibration/buffer_distribution_entropy": 0.8477875709184838,
"calibration/confidence_entropy": 0.41931734632897494,
"calibration/coverage@0%": 0.04187046632642211,
"calibration/coverage@1%": 0.04187046632642211,
"calibration/coverage@10%": 0.5120449317449931,
"calibration/coverage@15%": 0.6430785094700009,
"calibration/coverage@20%": 0.7207332569334908,
"calibration/coverage@25%": 0.8257768317559318,
"calibration/coverage@30%": 0.9018360536714092,
"calibration/coverage@5%": 0.29195404140585296,
"calibration/ece": 0.12538457953207308,
"calibration/mean_confidence": 0.6443893716018676,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008680555555555558,
"completions/max_length": 3490.6,
"completions/max_terminated_length": 3490.6,
"completions/mean_length": 873.49384765625,
"completions/mean_terminated_length": 881.2654296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 242.8,
"epoch": 0.39599505006187424,
"grad_norm": 0.0004413281276356429,
"learning_rate": 1.2951807228915664e-06,
"loss": -0.007,
"num_tokens": 355628724.0,
"reward": 1.0522673010826111,
"reward_std": 0.11457638144493103,
"rewards/accuracy_reward": 0.668836796283722,
"rewards/brier_reward": 0.8068422317504883,
"rewards/confidence_uniqueness_reward": 0.9226135849952698,
"rewards/format_reward": 0.9913194417953491,
"rewards/frontier_aurc_reward": -0.0013329184614121914,
"rewards/frontier_coverage_1": 0.04676450602710247,
"rewards/frontier_coverage_10": 0.04676450602710247,
"rewards/frontier_coverage_15": 0.04676450602710247,
"rewards/frontier_coverage_20": 0.040921327844262126,
"rewards/frontier_coverage_25": 0.06650637164711952,
"rewards/frontier_coverage_5": 0.04676450602710247,
"rewards/frontier_ece_reward": 0.006077949050813913,
"signal/accuracy_reward/centered_abs_mean": 0.1448296457529068,
"signal/accuracy_reward/group_std_mean": 0.1931760638952255,
"signal/accuracy_reward/group_zero_std_frac": 0.4388888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0724148228764534,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0724148228764534,
"signal/advantage_abs_mean": 0.08313089311122894,
"signal/advantage_pre_scale_abs_mean": 0.08313089311122894,
"signal/advantage_pre_scale_std": 0.15007005333900453,
"signal/advantage_std": 0.15007005333900453,
"signal/brier_reward/centered_abs_mean": 0.13480945378541948,
"signal/brier_reward/group_std_mean": 0.17508352398872376,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016851181723177434,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016851181723177434,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03558523468673229,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05330366343259811,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004448154335841536,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004448154335841536,
"signal/format_reward/centered_abs_mean": 0.014876301772892475,
"signal/format_reward/group_std_mean": 0.028501024469733238,
"signal/format_reward/group_zero_std_frac": 0.8805555582046509,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007438150886446238,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007438150886446238,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014591423678211868,
"signal/frontier_aurc_reward/group_std_mean": 0.0024731668643653395,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6118645473616196e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6118645473616196e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.155775585770607,
"signal/frontier_coverage_1/group_std_mean": 0.20951978862285614,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027883827686309816,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027883827686309816,
"signal/frontier_coverage_10/centered_abs_mean": 0.155775585770607,
"signal/frontier_coverage_10/group_std_mean": 0.20951978862285614,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027883827686309816,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027883827686309816,
"signal/frontier_coverage_15/centered_abs_mean": 0.155775585770607,
"signal/frontier_coverage_15/group_std_mean": 0.20951978862285614,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027883827686309816,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027883827686309816,
"signal/frontier_coverage_20/centered_abs_mean": 0.09291831254959107,
"signal/frontier_coverage_20/group_std_mean": 0.12665492296218872,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016632377402856946,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016632377402856946,
"signal/frontier_coverage_25/centered_abs_mean": 0.06688660979270936,
"signal/frontier_coverage_25/group_std_mean": 0.08502381294965744,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011972703039646148,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011972703039646148,
"signal/frontier_coverage_5/centered_abs_mean": 0.155775585770607,
"signal/frontier_coverage_5/group_std_mean": 0.20951978862285614,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027883827686309816,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027883827686309816,
"signal/frontier_ece_reward/centered_abs_mean": 0.009020310081541538,
"signal/frontier_ece_reward/group_std_mean": 0.01170970220118761,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011275387601926922,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011275387601926922,
"step": 165
},
{
"calibration/aurc": 0.10870616500791086,
"calibration/batch_distribution_entropy": 0.7903104452510206,
"calibration/buffer_distribution_entropy": 0.8493497664534088,
"calibration/confidence_entropy": 0.39392810200666045,
"calibration/coverage@0%": 0.039693163843954195,
"calibration/coverage@1%": 0.15010983051062085,
"calibration/coverage@10%": 0.6249913825702296,
"calibration/coverage@15%": 0.7163854370584731,
"calibration/coverage@20%": 0.8282437601072432,
"calibration/coverage@25%": 0.9081739226033421,
"calibration/coverage@30%": 0.9605802000879506,
"calibration/coverage@5%": 0.4011407911307888,
"calibration/ece": 0.08378010703101495,
"calibration/mean_confidence": 0.7437848913758066,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008506944444444465,
"completions/max_length": 3674.0,
"completions/max_terminated_length": 3674.0,
"completions/mean_length": 838.0208374023438,
"completions/mean_terminated_length": 845.2309448242188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 239.8,
"epoch": 0.4079949000637492,
"grad_norm": 0.0004973475588485599,
"learning_rate": 1.1445783132530121e-06,
"loss": -0.0069,
"num_tokens": 368371908.0,
"reward": 1.0795949220657348,
"reward_std": 0.11285789757966995,
"rewards/accuracy_reward": 0.7223958373069763,
"rewards/brier_reward": 0.8292520999908447,
"rewards/confidence_uniqueness_reward": 0.9112018942832947,
"rewards/format_reward": 0.9914930462837219,
"rewards/frontier_aurc_reward": -0.001215806626714766,
"rewards/frontier_coverage_1": 0.028353986889123918,
"rewards/frontier_coverage_10": 0.028353986889123918,
"rewards/frontier_coverage_15": 0.028459986671805382,
"rewards/frontier_coverage_20": 0.033776380494236945,
"rewards/frontier_coverage_25": 0.0912104532122612,
"rewards/frontier_coverage_5": 0.028353986889123918,
"rewards/frontier_ece_reward": 0.006769264675676822,
"signal/accuracy_reward/centered_abs_mean": 0.1389865458011627,
"signal/accuracy_reward/group_std_mean": 0.1862527459859848,
"signal/accuracy_reward/group_zero_std_frac": 0.45555556416511533,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06949327290058135,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06949327290058135,
"signal/advantage_abs_mean": 0.08184980154037476,
"signal/advantage_pre_scale_abs_mean": 0.08184980154037476,
"signal/advantage_pre_scale_std": 0.15188938081264497,
"signal/advantage_std": 0.15188938081264497,
"signal/brier_reward/centered_abs_mean": 0.12720216065645218,
"signal/brier_reward/group_std_mean": 0.1649218052625656,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015900270082056522,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015900270082056522,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03929706513881683,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05663086473941803,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004912133142352104,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004912133142352104,
"signal/format_reward/centered_abs_mean": 0.014507378544658422,
"signal/format_reward/group_std_mean": 0.0268052663654089,
"signal/format_reward/group_zero_std_frac": 0.8916666865348816,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007253689272329211,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007253689272329211,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016262418590486049,
"signal/frontier_aurc_reward/group_std_mean": 0.0029089401010423898,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.910972725658212e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.910972725658212e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13412159085273742,
"signal/frontier_coverage_1/group_std_mean": 0.1826484888792038,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024007763247936966,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024007763247936966,
"signal/frontier_coverage_10/centered_abs_mean": 0.13412159085273742,
"signal/frontier_coverage_10/group_std_mean": 0.1826484888792038,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024007763247936966,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024007763247936966,
"signal/frontier_coverage_15/centered_abs_mean": 0.13297670781612397,
"signal/frontier_coverage_15/group_std_mean": 0.1811499148607254,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023802829906344413,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023802829906344413,
"signal/frontier_coverage_20/centered_abs_mean": 0.07466747760772705,
"signal/frontier_coverage_20/group_std_mean": 0.10277672857046127,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013365477789193392,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013365477789193392,
"signal/frontier_coverage_25/centered_abs_mean": 0.0674952432513237,
"signal/frontier_coverage_25/group_std_mean": 0.08588269799947738,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012081648223102094,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012081648223102094,
"signal/frontier_coverage_5/centered_abs_mean": 0.13412159085273742,
"signal/frontier_coverage_5/group_std_mean": 0.1826484888792038,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024007763247936966,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024007763247936966,
"signal/frontier_ece_reward/centered_abs_mean": 0.008575642108917236,
"signal/frontier_ece_reward/group_std_mean": 0.011006982997059822,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00555555559694767,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010719552636146545,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010719552636146545,
"step": 170
},
{
"calibration/aurc": 0.11067553128722692,
"calibration/batch_distribution_entropy": 0.8481618438645689,
"calibration/buffer_distribution_entropy": 0.8483553201550436,
"calibration/confidence_entropy": 0.40064739663776583,
"calibration/coverage@0%": 0.03877597911227154,
"calibration/coverage@1%": 0.1033262436625361,
"calibration/coverage@10%": 0.5340208553130162,
"calibration/coverage@15%": 0.7327274043747142,
"calibration/coverage@20%": 0.8668559271692431,
"calibration/coverage@25%": 0.9450903394255874,
"calibration/coverage@30%": 0.9826005221932114,
"calibration/coverage@5%": 0.30878133853807876,
"calibration/ece": 0.0929162421597211,
"calibration/mean_confidence": 0.6861792539951647,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00859375,
"completions/max_length": 3590.6,
"completions/max_terminated_length": 3590.6,
"completions/mean_length": 870.5375122070312,
"completions/mean_terminated_length": 878.083056640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 246.8,
"epoch": 0.4199947500656242,
"grad_norm": 0.00044883930240757763,
"learning_rate": 9.93975903614458e-07,
"loss": -0.0071,
"num_tokens": 381508468.0,
"reward": 1.0745736122131349,
"reward_std": 0.11894840151071548,
"rewards/accuracy_reward": 0.7121527671813965,
"rewards/brier_reward": 0.8242484927177429,
"rewards/confidence_uniqueness_reward": 0.9156982779502869,
"rewards/format_reward": 0.9913194417953491,
"rewards/frontier_aurc_reward": -0.0012379781110212207,
"rewards/frontier_coverage_1": 0.030451742745935918,
"rewards/frontier_coverage_10": 0.030451742745935918,
"rewards/frontier_coverage_15": 0.030320987850427628,
"rewards/frontier_coverage_20": 0.03499968759715557,
"rewards/frontier_coverage_25": 0.09922654330730438,
"rewards/frontier_coverage_5": 0.030451742745935918,
"rewards/frontier_ece_reward": 0.00628520967438817,
"signal/accuracy_reward/centered_abs_mean": 0.14856770932674407,
"signal/accuracy_reward/group_std_mean": 0.2042417496442795,
"signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07428385466337203,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07428385466337203,
"signal/advantage_abs_mean": 0.08372878432273864,
"signal/advantage_pre_scale_abs_mean": 0.08372878432273864,
"signal/advantage_pre_scale_std": 0.1532825142145157,
"signal/advantage_std": 0.1532825142145157,
"signal/brier_reward/centered_abs_mean": 0.12630099207162857,
"signal/brier_reward/group_std_mean": 0.16852413713932038,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01578762400895357,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01578762400895357,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037941998615860936,
"signal/confidence_uniqueness_reward/group_std_mean": 0.055057863146066664,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004742749826982617,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004742749826982617,
"signal/format_reward/centered_abs_mean": 0.014756944379769266,
"signal/format_reward/group_std_mean": 0.026993418857455254,
"signal/format_reward/group_zero_std_frac": 0.8916666626930236,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007378472189884633,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007378472189884633,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017084946855902671,
"signal/frontier_aurc_reward/group_std_mean": 0.0031451730988919734,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.058205293200444e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.058205293200444e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14232682287693024,
"signal/frontier_coverage_1/group_std_mean": 0.19747399687767028,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025476500391960143,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025476500391960143,
"signal/frontier_coverage_10/centered_abs_mean": 0.14232682287693024,
"signal/frontier_coverage_10/group_std_mean": 0.19747399687767028,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025476500391960143,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025476500391960143,
"signal/frontier_coverage_15/centered_abs_mean": 0.14066008031368255,
"signal/frontier_coverage_15/group_std_mean": 0.19527221620082855,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002517815353348851,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002517815353348851,
"signal/frontier_coverage_20/centered_abs_mean": 0.07579737156629562,
"signal/frontier_coverage_20/group_std_mean": 0.10598736554384232,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013567729154601693,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013567729154601693,
"signal/frontier_coverage_25/centered_abs_mean": 0.07291418462991714,
"signal/frontier_coverage_25/group_std_mean": 0.09304469972848892,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013051638146862389,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013051638146862389,
"signal/frontier_coverage_5/centered_abs_mean": 0.14232682287693024,
"signal/frontier_coverage_5/group_std_mean": 0.19747399687767028,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025476500391960143,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025476500391960143,
"signal/frontier_ece_reward/centered_abs_mean": 0.00866670086979866,
"signal/frontier_ece_reward/group_std_mean": 0.011408805288374424,
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010833376087248324,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010833376087248324,
"step": 175
},
{
"calibration/aurc": 0.0961958541907562,
"calibration/batch_distribution_entropy": 0.8477025009746469,
"calibration/buffer_distribution_entropy": 0.8498138843428966,
"calibration/confidence_entropy": 0.4101348990305955,
"calibration/coverage@0%": 0.088780644337697,
"calibration/coverage@1%": 0.088780644337697,
"calibration/coverage@10%": 0.5502983060324453,
"calibration/coverage@15%": 0.807155930454841,
"calibration/coverage@20%": 0.916858509725483,
"calibration/coverage@25%": 0.9773861186549823,
"calibration/coverage@30%": 0.9979057591623036,
"calibration/coverage@5%": 0.355199305315759,
"calibration/ece": 0.08873742643457942,
"calibration/mean_confidence": 0.7072252329346721,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009548611111111138,
"completions/max_length": 3770.4,
"completions/max_terminated_length": 3770.4,
"completions/mean_length": 837.043408203125,
"completions/mean_terminated_length": 845.1139404296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 270.4,
"epoch": 0.4319946000674992,
"grad_norm": 0.00045111271901987493,
"learning_rate": 8.433734939759036e-07,
"loss": -0.0074,
"num_tokens": 394251176.0,
"reward": 1.0717910051345825,
"reward_std": 0.12240722179412841,
"rewards/accuracy_reward": 0.7144965171813965,
"rewards/brier_reward": 0.807321059703827,
"rewards/confidence_uniqueness_reward": 0.9133202791213989,
"rewards/format_reward": 0.9904513955116272,
"rewards/frontier_aurc_reward": -0.0018045842181891203,
"rewards/frontier_coverage_1": 0.016695484053343534,
"rewards/frontier_coverage_10": 0.016695484053343534,
"rewards/frontier_coverage_15": 0.017037773295305668,
"rewards/frontier_coverage_20": 0.028835199400782587,
"rewards/frontier_coverage_25": 0.10415665209293365,
"rewards/frontier_coverage_5": 0.016695484053343534,
"rewards/frontier_ece_reward": 0.00549684651196003,
"signal/accuracy_reward/centered_abs_mean": 0.15064561367034912,
"signal/accuracy_reward/group_std_mean": 0.2003714770078659,
"signal/accuracy_reward/group_zero_std_frac": 0.4194444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07532280683517456,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07532280683517456,
"signal/advantage_abs_mean": 0.08792258501052856,
"signal/advantage_pre_scale_abs_mean": 0.08792258501052856,
"signal/advantage_pre_scale_std": 0.15989961624145507,
"signal/advantage_std": 0.15989961624145507,
"signal/brier_reward/centered_abs_mean": 0.13230671286582946,
"signal/brier_reward/group_std_mean": 0.17412598431110382,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016538339108228682,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016538339108228682,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.040982935577631,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06092101261019707,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005122866947203875,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005122866947203875,
"signal/format_reward/centered_abs_mean": 0.01662326380610466,
"signal/format_reward/group_std_mean": 0.03184187039732933,
"signal/format_reward/group_zero_std_frac": 0.8666666626930237,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00831163190305233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00831163190305233,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023748093051835896,
"signal/frontier_aurc_reward/group_std_mean": 0.004061697609722614,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2509083868935704e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2509083868935704e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13885007798671722,
"signal/frontier_coverage_1/group_std_mean": 0.18973132073879242,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024854163639247417,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024854163639247417,
"signal/frontier_coverage_10/centered_abs_mean": 0.13885007798671722,
"signal/frontier_coverage_10/group_std_mean": 0.18973132073879242,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024854163639247417,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024854163639247417,
"signal/frontier_coverage_15/centered_abs_mean": 0.13509701192378998,
"signal/frontier_coverage_15/group_std_mean": 0.18487387001514435,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002418236620724201,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002418236620724201,
"signal/frontier_coverage_20/centered_abs_mean": 0.06975524574518203,
"signal/frontier_coverage_20/group_std_mean": 0.095186148583889,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001248618890531361,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001248618890531361,
"signal/frontier_coverage_25/centered_abs_mean": 0.0811617761850357,
"signal/frontier_coverage_25/group_std_mean": 0.103651861846447,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014527957886457444,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014527957886457444,
"signal/frontier_coverage_5/centered_abs_mean": 0.13885007798671722,
"signal/frontier_coverage_5/group_std_mean": 0.18973132073879242,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024854163639247417,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024854163639247417,
"signal/frontier_ece_reward/centered_abs_mean": 0.008750239573419093,
"signal/frontier_ece_reward/group_std_mean": 0.011527445912361146,
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010937799466773867,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010937799466773867,
"step": 180
},
{
"calibration/aurc": 0.18495376889754953,
"calibration/batch_distribution_entropy": 0.8803191163454155,
"calibration/buffer_distribution_entropy": 0.851482679855412,
"calibration/confidence_entropy": 0.4354021479497231,
"calibration/coverage@0%": 0.012587749815716776,
"calibration/coverage@1%": 0.012587749815716776,
"calibration/coverage@10%": 0.1918354407163473,
"calibration/coverage@15%": 0.36826092817701916,
"calibration/coverage@20%": 0.6374964095137642,
"calibration/coverage@25%": 0.8755843999038427,
"calibration/coverage@30%": 0.9279373368146213,
"calibration/coverage@5%": 0.06898461665644785,
"calibration/ece": 0.14425179951501588,
"calibration/mean_confidence": 0.6684984209096279,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009461805555555558,
"completions/max_length": 3679.8,
"completions/max_terminated_length": 3679.8,
"completions/mean_length": 818.1920043945313,
"completions/mean_terminated_length": 826.032080078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 217.8,
"epoch": 0.44399445006937416,
"grad_norm": 0.0004495714674703777,
"learning_rate": 6.927710843373495e-07,
"loss": -0.0085,
"num_tokens": 406766796.0,
"reward": 1.0636092185974122,
"reward_std": 0.12089930176734924,
"rewards/accuracy_reward": 0.6918402791023255,
"rewards/brier_reward": 0.8123436450958252,
"rewards/confidence_uniqueness_reward": 0.925143015384674,
"rewards/format_reward": 0.9905382037162781,
"rewards/frontier_aurc_reward": -0.0011275873170234264,
"rewards/frontier_coverage_1": 0.027901495201513173,
"rewards/frontier_coverage_10": 0.027901495201513173,
"rewards/frontier_coverage_15": 0.02855590097606182,
"rewards/frontier_coverage_20": 0.0363032516092062,
"rewards/frontier_coverage_25": 0.1104502335190773,
"rewards/frontier_coverage_5": 0.027901495201513173,
"rewards/frontier_ece_reward": 0.00494370711967349,
"signal/accuracy_reward/centered_abs_mean": 0.15333116352558135,
"signal/accuracy_reward/group_std_mean": 0.20526001155376433,
"signal/accuracy_reward/group_zero_std_frac": 0.40833333134651184,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07666558176279067,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07666558176279067,
"signal/advantage_abs_mean": 0.08671480715274811,
"signal/advantage_pre_scale_abs_mean": 0.08671480715274811,
"signal/advantage_pre_scale_std": 0.15482214391231536,
"signal/advantage_std": 0.15482214391231536,
"signal/brier_reward/centered_abs_mean": 0.1261175572872162,
"signal/brier_reward/group_std_mean": 0.164833265542984,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015764694660902023,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015764694660902023,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03562588319182396,
"signal/confidence_uniqueness_reward/group_std_mean": 0.055293154716491696,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004453235398977995,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004453235398977995,
"signal/format_reward/centered_abs_mean": 0.016520182229578496,
"signal/format_reward/group_std_mean": 0.032335417345166206,
"signal/format_reward/group_zero_std_frac": 0.8638888835906983,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008260091114789248,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008260091114789248,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013634230359457432,
"signal/frontier_aurc_reward/group_std_mean": 0.0024055395508185027,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4405272415606306e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4405272415606306e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15479598343372344,
"signal/frontier_coverage_1/group_std_mean": 0.20758814811706544,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002770848013460636,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002770848013460636,
"signal/frontier_coverage_10/centered_abs_mean": 0.15479598343372344,
"signal/frontier_coverage_10/group_std_mean": 0.20758814811706544,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002770848013460636,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002770848013460636,
"signal/frontier_coverage_15/centered_abs_mean": 0.14861891269683838,
"signal/frontier_coverage_15/group_std_mean": 0.19950321912765503,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002660278417170048,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002660278417170048,
"signal/frontier_coverage_20/centered_abs_mean": 0.07016772180795669,
"signal/frontier_coverage_20/group_std_mean": 0.09428980499505997,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012560022063553334,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012560022063553334,
"signal/frontier_coverage_25/centered_abs_mean": 0.07735366076231003,
"signal/frontier_coverage_25/group_std_mean": 0.09949797540903091,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013846305664628744,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013846305664628744,
"signal/frontier_coverage_5/centered_abs_mean": 0.15479598343372344,
"signal/frontier_coverage_5/group_std_mean": 0.20758814811706544,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002770848013460636,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002770848013460636,
"signal/frontier_ece_reward/centered_abs_mean": 0.008560269139707088,
"signal/frontier_ece_reward/group_std_mean": 0.011409120261669159,
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001070033642463386,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001070033642463386,
"step": 185
},
{
"calibration/aurc": 0.15022940629833742,
"calibration/batch_distribution_entropy": 0.8461052918312866,
"calibration/buffer_distribution_entropy": 0.8533362490457753,
"calibration/confidence_entropy": 0.44849927036648723,
"calibration/coverage@0%": 0.021875,
"calibration/coverage@1%": 0.021875,
"calibration/coverage@10%": 0.4367375159578275,
"calibration/coverage@15%": 0.576325098459507,
"calibration/coverage@20%": 0.7084946935173707,
"calibration/coverage@25%": 0.9614501740644037,
"calibration/coverage@30%": 0.9932291666666668,
"calibration/coverage@5%": 0.14235813765516317,
"calibration/ece": 0.1212236417116653,
"calibration/mean_confidence": 0.6864637556987978,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006249999999999978,
"completions/max_length": 3390.8,
"completions/max_terminated_length": 3390.8,
"completions/mean_length": 811.7388916015625,
"completions/mean_terminated_length": 816.8151123046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 219.6,
"epoch": 0.45599430007124914,
"grad_norm": 0.0004526027769315988,
"learning_rate": 5.421686746987952e-07,
"loss": -0.0038,
"num_tokens": 419200972.0,
"reward": 1.086756706237793,
"reward_std": 0.11896635293960571,
"rewards/accuracy_reward": 0.7328124880790711,
"rewards/brier_reward": 0.8254269242286683,
"rewards/confidence_uniqueness_reward": 0.92621408700943,
"rewards/format_reward": 0.99375,
"rewards/frontier_aurc_reward": -0.0012867568526417016,
"rewards/frontier_coverage_1": 0.01327955424785614,
"rewards/frontier_coverage_10": 0.01327955424785614,
"rewards/frontier_coverage_15": 0.01672282423824072,
"rewards/frontier_coverage_20": 0.0356328509747982,
"rewards/frontier_coverage_25": 0.13131238967180253,
"rewards/frontier_coverage_5": 0.01327955424785614,
"rewards/frontier_ece_reward": 0.004340594261884689,
"signal/accuracy_reward/centered_abs_mean": 0.15990668535232544,
"signal/accuracy_reward/group_std_mean": 0.21131123900413512,
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07995334267616272,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07995334267616272,
"signal/advantage_abs_mean": 0.08612867295742035,
"signal/advantage_pre_scale_abs_mean": 0.08612867295742035,
"signal/advantage_pre_scale_std": 0.15164274871349334,
"signal/advantage_std": 0.15164274871349334,
"signal/brier_reward/centered_abs_mean": 0.11984222829341888,
"signal/brier_reward/group_std_mean": 0.15931495130062104,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01498027853667736,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01498027853667736,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0317846491932869,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04874978512525559,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003973081149160862,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003973081149160862,
"signal/format_reward/centered_abs_mean": 0.011197916697710752,
"signal/format_reward/group_std_mean": 0.023881056532263755,
"signal/format_reward/group_zero_std_frac": 0.8916666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005598958348855376,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005598958348855376,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016745397355407477,
"signal/frontier_aurc_reward/group_std_mean": 0.003021475113928318,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.997425981448032e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.997425981448032e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1465451642870903,
"signal/frontier_coverage_1/group_std_mean": 0.19608235359191895,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002623158413916826,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002623158413916826,
"signal/frontier_coverage_10/centered_abs_mean": 0.1465451642870903,
"signal/frontier_coverage_10/group_std_mean": 0.19608235359191895,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002623158413916826,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002623158413916826,
"signal/frontier_coverage_15/centered_abs_mean": 0.13391998708248137,
"signal/frontier_coverage_15/group_std_mean": 0.1800734966993332,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002397167752496898,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002397167752496898,
"signal/frontier_coverage_20/centered_abs_mean": 0.062118491530418395,
"signal/frontier_coverage_20/group_std_mean": 0.08400460481643676,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011119209812022746,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011119209812022746,
"signal/frontier_coverage_25/centered_abs_mean": 0.08528402894735336,
"signal/frontier_coverage_25/group_std_mean": 0.11039264798164368,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015265840804204345,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015265840804204345,
"signal/frontier_coverage_5/centered_abs_mean": 0.1465451642870903,
"signal/frontier_coverage_5/group_std_mean": 0.19608235359191895,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002623158413916826,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002623158413916826,
"signal/frontier_ece_reward/centered_abs_mean": 0.00753614604473114,
"signal/frontier_ece_reward/group_std_mean": 0.010234573669731618,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009420182555913925,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009420182555913925,
"step": 190
},
{
"calibration/aurc": 0.14937174985204743,
"calibration/batch_distribution_entropy": 0.8768565334857193,
"calibration/buffer_distribution_entropy": 0.8564208478703119,
"calibration/confidence_entropy": 0.4413662764250832,
"calibration/coverage@0%": 0.041348916887709995,
"calibration/coverage@1%": 0.08378923519009726,
"calibration/coverage@10%": 0.40936488333469506,
"calibration/coverage@15%": 0.5909852478554963,
"calibration/coverage@20%": 0.6877917325599496,
"calibration/coverage@25%": 0.794470757533946,
"calibration/coverage@30%": 0.9077784145987675,
"calibration/coverage@5%": 0.21624305560365026,
"calibration/ece": 0.1298338782403467,
"calibration/mean_confidence": 0.6607892197608017,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011111111111111117,
"completions/max_length": 3528.2,
"completions/max_terminated_length": 3528.2,
"completions/mean_length": 832.9720458984375,
"completions/mean_terminated_length": 842.4790161132812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 214.2,
"epoch": 0.46799415007312406,
"grad_norm": 0.00047915452159941196,
"learning_rate": 3.91566265060241e-07,
"loss": -0.0095,
"num_tokens": 431877674.0,
"reward": 1.0597479343414307,
"reward_std": 0.12377008944749832,
"rewards/accuracy_reward": 0.6863715291023255,
"rewards/brier_reward": 0.8096499562263488,
"rewards/confidence_uniqueness_reward": 0.9205044031143188,
"rewards/format_reward": 0.9888020873069763,
"rewards/frontier_aurc_reward": -0.001665916945785284,
"rewards/frontier_coverage_1": 0.029197129979729654,
"rewards/frontier_coverage_10": 0.029197129979729654,
"rewards/frontier_coverage_15": 0.029610903933644295,
"rewards/frontier_coverage_20": 0.04197726622223854,
"rewards/frontier_coverage_25": 0.1404498651623726,
"rewards/frontier_coverage_5": 0.029197129979729654,
"rewards/frontier_ece_reward": 0.004466280713677407,
"signal/accuracy_reward/centered_abs_mean": 0.1545193150639534,
"signal/accuracy_reward/group_std_mean": 0.2042325258255005,
"signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0772596575319767,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0772596575319767,
"signal/advantage_abs_mean": 0.09082913100719452,
"signal/advantage_pre_scale_abs_mean": 0.09082913100719452,
"signal/advantage_pre_scale_std": 0.16084725856781007,
"signal/advantage_std": 0.16084725856781007,
"signal/brier_reward/centered_abs_mean": 0.12776512503623963,
"signal/brier_reward/group_std_mean": 0.16529574990272522,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015970640629529954,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015970640629529954,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03753339871764183,
"signal/confidence_uniqueness_reward/group_std_mean": 0.056541355699300765,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004691674839705229,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004691674839705229,
"signal/format_reward/centered_abs_mean": 0.017822265438735486,
"signal/format_reward/group_std_mean": 0.03260133340954781,
"signal/format_reward/group_zero_std_frac": 0.8694444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008911132719367743,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008911132719367743,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00225935832131654,
"signal/frontier_aurc_reward/group_std_mean": 0.004012473439797759,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.044251254526898e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.044251254526898e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1396041989326477,
"signal/frontier_coverage_1/group_std_mean": 0.18966357111930848,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024989150697365403,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024989150697365403,
"signal/frontier_coverage_10/centered_abs_mean": 0.1396041989326477,
"signal/frontier_coverage_10/group_std_mean": 0.18966357111930848,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024989150697365403,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024989150697365403,
"signal/frontier_coverage_15/centered_abs_mean": 0.11660263985395432,
"signal/frontier_coverage_15/group_std_mean": 0.1599918618798256,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002087187208235264,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002087187208235264,
"signal/frontier_coverage_20/centered_abs_mean": 0.060534077882766726,
"signal/frontier_coverage_20/group_std_mean": 0.08167311102151871,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010835599503479898,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010835599503479898,
"signal/frontier_coverage_25/centered_abs_mean": 0.0967460110783577,
"signal/frontier_coverage_25/group_std_mean": 0.12314206212759018,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017317535821348429,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017317535821348429,
"signal/frontier_coverage_5/centered_abs_mean": 0.1396041989326477,
"signal/frontier_coverage_5/group_std_mean": 0.18966357111930848,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024989150697365403,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024989150697365403,
"signal/frontier_ece_reward/centered_abs_mean": 0.007368762884289027,
"signal/frontier_ece_reward/group_std_mean": 0.010059486515820027,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009210953605361283,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009210953605361283,
"step": 195
},
{
"calibration/aurc": 0.1517635493969927,
"calibration/batch_distribution_entropy": 0.8032683764135182,
"calibration/buffer_distribution_entropy": 0.8594278407178504,
"calibration/confidence_entropy": 0.4228073073041322,
"calibration/coverage@0%": 0.05823443307656014,
"calibration/coverage@1%": 0.05823443307656014,
"calibration/coverage@10%": 0.4063801377610362,
"calibration/coverage@15%": 0.6229884169428966,
"calibration/coverage@20%": 0.6913565883420245,
"calibration/coverage@25%": 0.8279600708198709,
"calibration/coverage@30%": 0.941512982494816,
"calibration/coverage@5%": 0.15745867617066028,
"calibration/ece": 0.12641913652799427,
"calibration/mean_confidence": 0.7471995840116807,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006770833333333326,
"completions/max_length": 3694.8,
"completions/max_terminated_length": 3694.8,
"completions/mean_length": 800.4954956054687,
"completions/mean_terminated_length": 805.9992065429688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 240.0,
"epoch": 0.47999400007499904,
"grad_norm": 0.0005283089121803641,
"learning_rate": 2.409638554216868e-07,
"loss": -0.0058,
"num_tokens": 444167190.0,
"reward": 1.0729887247085572,
"reward_std": 0.11522095501422883,
"rewards/accuracy_reward": 0.7047742962837219,
"rewards/brier_reward": 0.8240418195724487,
"rewards/confidence_uniqueness_reward": 0.9152790069580078,
"rewards/format_reward": 0.9932291626930236,
"rewards/frontier_aurc_reward": -0.0018710391130298376,
"rewards/frontier_coverage_1": 0.030061314441263677,
"rewards/frontier_coverage_10": 0.030090765841305257,
"rewards/frontier_coverage_15": 0.030872286297380924,
"rewards/frontier_coverage_20": 0.04789535701274872,
"rewards/frontier_coverage_25": 0.1694835215806961,
"rewards/frontier_coverage_5": 0.030061314441263677,
"rewards/frontier_ece_reward": 0.004374683182686567,
"signal/accuracy_reward/centered_abs_mean": 0.1409125432372093,
"signal/accuracy_reward/group_std_mean": 0.18542096316814421,
"signal/accuracy_reward/group_zero_std_frac": 0.47222223281860354,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07045627161860465,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07045627161860465,
"signal/advantage_abs_mean": 0.08473498374223709,
"signal/advantage_pre_scale_abs_mean": 0.08473498374223709,
"signal/advantage_pre_scale_std": 0.1543968439102173,
"signal/advantage_std": 0.1543968439102173,
"signal/brier_reward/centered_abs_mean": 0.11732118874788285,
"signal/brier_reward/group_std_mean": 0.15466432571411132,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014665148593485356,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014665148593485356,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.035984426736831665,
"signal/confidence_uniqueness_reward/group_std_mean": 0.054205088317394255,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004498053342103958,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004498053342103958,
"signal/format_reward/centered_abs_mean": 0.012174479011446238,
"signal/format_reward/group_std_mean": 0.02487517409026623,
"signal/format_reward/group_zero_std_frac": 0.8916666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006087239505723119,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006087239505723119,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023178313160315154,
"signal/frontier_aurc_reward/group_std_mean": 0.004119249107316136,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.148917651036754e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.148917651036754e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.10867589861154556,
"signal/frontier_coverage_1/group_std_mean": 0.15358475148677825,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019452984910458327,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019452984910458327,
"signal/frontier_coverage_10/centered_abs_mean": 0.10854685455560684,
"signal/frontier_coverage_10/group_std_mean": 0.15342499017715455,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019429885549470782,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019429885549470782,
"signal/frontier_coverage_15/centered_abs_mean": 0.08157736957073211,
"signal/frontier_coverage_15/group_std_mean": 0.11773104816675187,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014602348441258074,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014602348441258074,
"signal/frontier_coverage_20/centered_abs_mean": 0.052034994959831236,
"signal/frontier_coverage_20/group_std_mean": 0.07054910808801651,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000931426405441016,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000931426405441016,
"signal/frontier_coverage_25/centered_abs_mean": 0.10885821878910065,
"signal/frontier_coverage_25/group_std_mean": 0.13929919749498368,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001948562078177929,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001948562078177929,
"signal/frontier_coverage_5/centered_abs_mean": 0.10867589861154556,
"signal/frontier_coverage_5/group_std_mean": 0.15358475148677825,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019452984910458327,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019452984910458327,
"signal/frontier_ece_reward/centered_abs_mean": 0.006094504240900278,
"signal/frontier_ece_reward/group_std_mean": 0.00841012941673398,
"signal/frontier_ece_reward/group_zero_std_frac": 0.008333333395421505,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007618130301125347,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007618130301125347,
"step": 200
},
{
"epoch": 0.47999400007499904,
"eval_calibration/aurc": 0.20361683297711194,
"eval_calibration/batch_distribution_entropy": 0.7910456665612545,
"eval_calibration/buffer_distribution_entropy": 0.8607564757055215,
"eval_calibration/confidence_entropy": 0.40042139560153805,
"eval_calibration/coverage@0%": 0.1213037634408602,
"eval_calibration/coverage@1%": 0.1213037634408602,
"eval_calibration/coverage@10%": 0.1743951612903226,
"eval_calibration/coverage@15%": 0.46135752688172044,
"eval_calibration/coverage@20%": 0.7064852150537634,
"eval_calibration/coverage@25%": 0.8635752688172044,
"eval_calibration/coverage@30%": 0.9474126344086021,
"eval_calibration/coverage@5%": 0.1213037634408602,
"eval_calibration/ece": 0.17280676072345136,
"eval_calibration/mean_confidence": 0.7212002863951991,
"eval_completions/clipped_ratio": 0.005208333333333352,
"eval_completions/max_length": 2564.1666666666665,
"eval_completions/max_terminated_length": 2564.1666666666665,
"eval_completions/mean_length": 822.8699951171875,
"eval_completions/mean_terminated_length": 827.223876953125,
"eval_completions/min_length": 89.33333333333333,
"eval_completions/min_terminated_length": 267.0,
"eval_loss": 0.0,
"eval_num_tokens": 444167190.0,
"eval_reward": 1.059295157591502,
"eval_reward_std": 0.2571010912458102,
"eval_rewards/accuracy_reward": 0.6935763955116272,
"eval_rewards/brier_reward": 0.8161595165729523,
"eval_rewards/confidence_uniqueness_reward": 0.8582899471124014,
"eval_rewards/format_reward": 0.9921875099341074,
"eval_rewards/frontier_aurc_reward": -0.0019197222621490557,
"eval_rewards/frontier_coverage_1": 0.0351586788892746,
"eval_rewards/frontier_coverage_10": 0.035206587674717106,
"eval_rewards/frontier_coverage_15": 0.03441356122493744,
"eval_rewards/frontier_coverage_20": 0.052705912540356316,
"eval_rewards/frontier_coverage_25": 0.17523299405972162,
"eval_rewards/frontier_coverage_5": 0.0351586788892746,
"eval_rewards/frontier_ece_reward": 0.004450828962338467,
"eval_runtime": 198.5455,
"eval_samples_per_second": 5.037,
"eval_signal/accuracy_reward/centered_abs_mean": 0.41162109375,
"eval_signal/accuracy_reward/group_std_mean": 0.4593026836713155,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.205810546875,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.205810546875,
"eval_signal/advantage_abs_mean": 0.22099632769823074,
"eval_signal/advantage_pre_scale_abs_mean": 0.22099632769823074,
"eval_signal/advantage_pre_scale_std": 0.2554586206873258,
"eval_signal/advantage_std": 0.2554586206873258,
"eval_signal/brier_reward/centered_abs_mean": 0.21053502460320792,
"eval_signal/brier_reward/group_std_mean": 0.2717415342728297,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02631687807540099,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02631687807540099,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06546468411882718,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0933909999827544,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008183085514853397,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008183085514853397,
"eval_signal/format_reward/centered_abs_mean": 0.015136718594779571,
"eval_signal/format_reward/group_std_mean": 0.04419417337824901,
"eval_signal/format_reward/group_zero_std_frac": 0.750000019868215,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.007568359297389786,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359297389786,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0032751309336163104,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007060678792186081,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.862484310152164e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.862484310152164e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.1731132542093595,
"eval_signal/frontier_coverage_1/group_std_mean": 0.2936793069044749,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030987270874902606,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030987270874902606,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.17259238163630167,
"eval_signal/frontier_coverage_10/group_std_mean": 0.2929122944672902,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003089403461975356,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003089403461975356,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.11876154070099194,
"eval_signal/frontier_coverage_15/group_std_mean": 0.212093619008859,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002125831456699719,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002125831456699719,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.07741224020719528,
"eval_signal/frontier_coverage_20/group_std_mean": 0.10848981390396754,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001385679099864016,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001385679099864016,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.22898491968711218,
"eval_signal/frontier_coverage_25/group_std_mean": 0.27611127495765686,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0040988298909117775,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0040988298909117775,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.1731132542093595,
"eval_signal/frontier_coverage_5/group_std_mean": 0.2936793069044749,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030987270874902606,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030987270874902606,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.008298173546791077,
"eval_signal/frontier_ece_reward/group_std_mean": 0.013287559927751621,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010372716933488846,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010372716933488846,
"eval_steps_per_second": 0.03,
"step": 200
},
{
"calibration/aurc": 0.15245076106115,
"calibration/batch_distribution_entropy": 0.7577490647981495,
"calibration/buffer_distribution_entropy": 0.8615814010175041,
"calibration/confidence_entropy": 0.370367604405363,
"calibration/coverage@0%": 0.00835509138381201,
"calibration/coverage@1%": 0.00835509138381201,
"calibration/coverage@10%": 0.2857309478647174,
"calibration/coverage@15%": 0.7050908432970091,
"calibration/coverage@20%": 0.8035156789611826,
"calibration/coverage@25%": 0.8801878712529355,
"calibration/coverage@30%": 0.961588617212322,
"calibration/coverage@5%": 0.09116869360084283,
"calibration/ece": 0.12256849527723586,
"calibration/mean_confidence": 0.7609970584329514,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00659722222222221,
"completions/max_length": 3460.2,
"completions/max_terminated_length": 3460.2,
"completions/mean_length": 824.7370727539062,
"completions/mean_terminated_length": 830.19814453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 214.4,
"epoch": 0.491993850076874,
"grad_norm": 0.000395325681893155,
"learning_rate": 9.036144578313253e-08,
"loss": -0.0053,
"num_tokens": 456734113.0,
"reward": 1.1002012491226196,
"reward_std": 0.11294516026973725,
"rewards/accuracy_reward": 0.7552083373069763,
"rewards/brier_reward": 0.8410086750984191,
"rewards/confidence_uniqueness_reward": 0.9115934729576111,
"rewards/format_reward": 0.9934027910232544,
"rewards/frontier_aurc_reward": -0.0013322666753083467,
"rewards/frontier_coverage_1": 0.01617803443223238,
"rewards/frontier_coverage_10": 0.016358466073870658,
"rewards/frontier_coverage_15": 0.022982559585943817,
"rewards/frontier_coverage_20": 0.05930071547627449,
"rewards/frontier_coverage_25": 0.22371198534965514,
"rewards/frontier_coverage_5": 0.01617803443223238,
"rewards/frontier_ece_reward": 0.003959867171943188,
"signal/accuracy_reward/centered_abs_mean": 0.1392578125,
"signal/accuracy_reward/group_std_mean": 0.1875597894191742,
"signal/accuracy_reward/group_zero_std_frac": 0.44722222685813906,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06962890625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06962890625,
"signal/advantage_abs_mean": 0.07913011610507965,
"signal/advantage_pre_scale_abs_mean": 0.07913011610507965,
"signal/advantage_pre_scale_std": 0.15015988945960998,
"signal/advantage_std": 0.15015988945960998,
"signal/brier_reward/centered_abs_mean": 0.11419829726219177,
"signal/brier_reward/group_std_mean": 0.1516292631626129,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014274787157773972,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014274787157773972,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03720296248793602,
"signal/confidence_uniqueness_reward/group_std_mean": 0.058379728347063065,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004650370310992002,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004650370310992002,
"signal/format_reward/centered_abs_mean": 0.012358940858393907,
"signal/format_reward/group_std_mean": 0.028991687297821044,
"signal/format_reward/group_zero_std_frac": 0.8611111283302307,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006179470429196953,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006179470429196953,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001682829950004816,
"signal/frontier_aurc_reward/group_std_mean": 0.0030279669910669325,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.012265406141523e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.012265406141523e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.11965394765138626,
"signal/frontier_coverage_1/group_std_mean": 0.16320410072803498,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021418056450784205,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021418056450784205,
"signal/frontier_coverage_10/centered_abs_mean": 0.11909585893154144,
"signal/frontier_coverage_10/group_std_mean": 0.16247594058513642,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002131815906614065,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002131815906614065,
"signal/frontier_coverage_15/centered_abs_mean": 0.08062577843666077,
"signal/frontier_coverage_15/group_std_mean": 0.11165858805179596,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014432014198973776,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014432014198973776,
"signal/frontier_coverage_20/centered_abs_mean": 0.05757641866803169,
"signal/frontier_coverage_20/group_std_mean": 0.07493609786033631,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010306178824976086,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010306178824976086,
"signal/frontier_coverage_25/centered_abs_mean": 0.1128468781709671,
"signal/frontier_coverage_25/group_std_mean": 0.1470080941915512,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020199591061100365,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020199591061100365,
"signal/frontier_coverage_5/centered_abs_mean": 0.11965394765138626,
"signal/frontier_coverage_5/group_std_mean": 0.16320410072803498,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021418056450784205,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021418056450784205,
"signal/frontier_ece_reward/centered_abs_mean": 0.006321498658508062,
"signal/frontier_ece_reward/group_std_mean": 0.008466892875730991,
"signal/frontier_ece_reward/group_zero_std_frac": 0.008333333395421505,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007901873323135078,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007901873323135078,
"step": 205
},
{
"calibration/aurc": 0.09685634602876492,
"calibration/batch_distribution_entropy": 0.7791143614888557,
"calibration/buffer_distribution_entropy": 0.8611139279672693,
"calibration/confidence_entropy": 0.38429371665391043,
"calibration/coverage@0%": 0.022687609075043632,
"calibration/coverage@1%": 0.022687609075043632,
"calibration/coverage@10%": 0.618178636717365,
"calibration/coverage@15%": 0.781235178457132,
"calibration/coverage@20%": 0.8722135499408289,
"calibration/coverage@25%": 0.9379210469362288,
"calibration/coverage@30%": 0.981675392670157,
"calibration/coverage@5%": 0.44410983252519953,
"calibration/ece": 0.0883028476897068,
"calibration/mean_confidence": 0.7646092393666405,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005353009259259263,
"completions/max_length": 3607.0,
"completions/max_terminated_length": 3607.0,
"completions/mean_length": 825.1263020833334,
"completions/mean_terminated_length": 829.6256917317709,
"completions/min_length": 0.0,
"completions/min_terminated_length": 193.33333333333334,
"epoch": 0.49919376007799904,
"num_tokens": 464303434.0,
"reward": 1.0698885917663574,
"reward_std": 0.1222114438811938,
"rewards/accuracy_reward": 0.6950231591860453,
"rewards/brier_reward": 0.8242372075716654,
"rewards/confidence_uniqueness_reward": 0.9139418800671896,
"rewards/format_reward": 0.9945023059844971,
"rewards/frontier_aurc_reward": -0.001452996317918102,
"rewards/frontier_coverage_1": 0.03662515555818876,
"rewards/frontier_coverage_10": 0.03657694533467293,
"rewards/frontier_coverage_15": 0.03316311786572138,
"rewards/frontier_coverage_20": 0.06067184483011564,
"rewards/frontier_coverage_25": 0.20710508028666177,
"rewards/frontier_coverage_5": 0.03662515555818876,
"rewards/frontier_ece_reward": 0.00421436270698905,
"signal/accuracy_reward/centered_abs_mean": 0.15033637235562006,
"signal/accuracy_reward/group_std_mean": 0.2045428305864334,
"signal/accuracy_reward/group_zero_std_frac": 0.40740742286046344,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07516818617781003,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07516818617781003,
"signal/advantage_abs_mean": 0.08676933993895848,
"signal/advantage_pre_scale_abs_mean": 0.08676933993895848,
"signal/advantage_pre_scale_std": 0.15460805098215738,
"signal/advantage_std": 0.15460805098215738,
"signal/brier_reward/centered_abs_mean": 0.125543013215065,
"signal/brier_reward/group_std_mean": 0.16625908513863882,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015692876651883125,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015692876651883125,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03518642236789068,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05552714318037033,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004398302795986335,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004398302795986335,
"signal/format_reward/centered_abs_mean": 0.0103804978231589,
"signal/format_reward/group_std_mean": 0.026168825725714367,
"signal/format_reward/group_zero_std_frac": 0.8657407363255819,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00519024891157945,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00519024891157945,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019372629079346855,
"signal/frontier_aurc_reward/group_std_mean": 0.0035577377614875636,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.46770048054168e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.46770048054168e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12492906053860982,
"signal/frontier_coverage_1/group_std_mean": 0.17234125236670175,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022362301436563334,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022362301436563334,
"signal/frontier_coverage_10/centered_abs_mean": 0.12409953524669011,
"signal/frontier_coverage_10/group_std_mean": 0.17126783728599548,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002221381369357308,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002221381369357308,
"signal/frontier_coverage_15/centered_abs_mean": 0.07909848541021347,
"signal/frontier_coverage_15/group_std_mean": 0.11092762400706609,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00141586281824857,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00141586281824857,
"signal/frontier_coverage_20/centered_abs_mean": 0.05819156641761462,
"signal/frontier_coverage_20/group_std_mean": 0.0763649841149648,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010416289248193304,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010416289248193304,
"signal/frontier_coverage_25/centered_abs_mean": 0.12411411106586456,
"signal/frontier_coverage_25/group_std_mean": 0.16261087854703268,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002221642527729273,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002221642527729273,
"signal/frontier_coverage_5/centered_abs_mean": 0.12492906053860982,
"signal/frontier_coverage_5/group_std_mean": 0.17234125236670175,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022362301436563334,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022362301436563334,
"signal/frontier_ece_reward/centered_abs_mean": 0.006716146133840084,
"signal/frontier_ece_reward/group_std_mean": 0.009008504450321198,
"signal/frontier_ece_reward/group_zero_std_frac": 0.004629629664123058,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008395182667300105,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008395182667300105,
"step": 208,
"total_flos": 0.0,
"train_loss": -0.01026152794320996,
"train_runtime": 40759.4999,
"train_samples_per_second": 0.368,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 208,
"num_input_tokens_seen": 464303434,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}