{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.6351994038421116, "calibration/batch_distribution_entropy": 0.6512784692126155, "calibration/confidence_entropy": 0.3468661812035868, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.49083744047999345, "calibration/mean_confidence": 0.7901167725714044, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03408203125, "completions/max_length": 1502.8, "completions/max_terminated_length": 1502.8, "completions/mean_length": 215.82197265625, "completions/mean_terminated_length": 223.429248046875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.029902346432209015, "learning_rate": 3.1249999999999997e-07, "loss": 0.01, "num_tokens": 17054049.0, "reward": 0.5691495656967163, "reward_std": 0.4165258049964905, "rewards/accuracy_reward": 0.22001953125, "rewards/brier_reward": 0.3759719550609589, "rewards/confidence_uniqueness_reward": 0.4875619649887085, "rewards/format_reward": 0.6849609375, "rewards/frontier_coverage_0": 0.30275666117668154, "rewards/frontier_coverage_1": 0.30275666117668154, "rewards/frontier_coverage_10": 0.30275666117668154, "rewards/frontier_coverage_15": 0.30275666117668154, "rewards/frontier_coverage_20": 0.30275666117668154, "rewards/frontier_coverage_25": 0.30275666117668154, "rewards/frontier_coverage_5": 0.30275666117668154, "signal/accuracy_reward/centered_abs_mean": 0.239569091796875, "signal/accuracy_reward/group_std_mean": 0.28268457651138307, "signal/accuracy_reward/group_zero_std_frac": 0.309375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1197845458984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1197845458984375, "signal/advantage_abs_mean": 0.354982727766037, "signal/advantage_pre_scale_abs_mean": 0.354982727766037, "signal/advantage_pre_scale_std": 0.4236880660057068, "signal/advantage_std": 0.4236880660057068, "signal/brier_reward/centered_abs_mean": 0.32025502920150756, "signal/brier_reward/group_std_mean": 0.3653526544570923, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03202550373971462, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03202550373971462, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2980002284049988, "signal/confidence_uniqueness_reward/group_std_mean": 0.34901362657546997, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029800022765994073, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029800022765994073, "signal/format_reward/centered_abs_mean": 0.40311279296875, "signal/format_reward/group_std_mean": 0.45344988703727723, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.201556396484375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.201556396484375, "signal/frontier_coverage_0/centered_abs_mean": 0.2926347076892853, "signal/frontier_coverage_0/group_std_mean": 0.34393285512924193, "signal/frontier_coverage_0/group_zero_std_frac": 0.003125, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004184676380828023, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004184676380828023, "signal/frontier_coverage_1/centered_abs_mean": 0.2926347076892853, "signal/frontier_coverage_1/group_std_mean": 0.34393285512924193, "signal/frontier_coverage_1/group_zero_std_frac": 0.003125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004184676380828023, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004184676380828023, "signal/frontier_coverage_10/centered_abs_mean": 0.2926347076892853, "signal/frontier_coverage_10/group_std_mean": 0.34393285512924193, "signal/frontier_coverage_10/group_zero_std_frac": 0.003125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004184676380828023, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004184676380828023, "signal/frontier_coverage_15/centered_abs_mean": 0.2926347076892853, "signal/frontier_coverage_15/group_std_mean": 0.34393285512924193, "signal/frontier_coverage_15/group_zero_std_frac": 0.003125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004184676380828023, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004184676380828023, "signal/frontier_coverage_20/centered_abs_mean": 0.2926347076892853, "signal/frontier_coverage_20/group_std_mean": 0.34393285512924193, "signal/frontier_coverage_20/group_zero_std_frac": 0.003125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004184676380828023, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004184676380828023, "signal/frontier_coverage_25/centered_abs_mean": 0.2926347076892853, "signal/frontier_coverage_25/group_std_mean": 0.34393285512924193, "signal/frontier_coverage_25/group_zero_std_frac": 0.003125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004184676380828023, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004184676380828023, "signal/frontier_coverage_5/centered_abs_mean": 0.2926347076892853, "signal/frontier_coverage_5/group_std_mean": 0.34393285512924193, "signal/frontier_coverage_5/group_zero_std_frac": 0.003125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004184676380828023, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004184676380828023, "step": 5 }, { "calibration/aurc": 0.6689473392497769, "calibration/batch_distribution_entropy": 0.6514151099566186, "calibration/confidence_entropy": 0.34427881956673384, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5258273923359426, "calibration/mean_confidence": 0.79101776183208, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03828125, "completions/max_length": 1505.8, "completions/max_terminated_length": 1505.8, "completions/mean_length": 204.794140625, "completions/mean_terminated_length": 212.9551544189453, "completions/min_length": 0.0, "completions/min_terminated_length": 1.8, "epoch": 0.032, "grad_norm": 0.03670521453022957, "learning_rate": 6.249999999999999e-07, "loss": 0.0038, "num_tokens": 34251493.0, "reward": 0.5770156979560852, "reward_std": 0.39822320342063905, "rewards/accuracy_reward": 0.2083984375, "rewards/brier_reward": 0.37617892026901245, "rewards/confidence_uniqueness_reward": 0.5085799217224121, "rewards/format_reward": 0.708984375, "rewards/frontier_coverage_0": 0.298185932636261, "rewards/frontier_coverage_1": 0.298185932636261, "rewards/frontier_coverage_10": 0.298185932636261, "rewards/frontier_coverage_15": 0.298185932636261, "rewards/frontier_coverage_20": 0.298185932636261, "rewards/frontier_coverage_25": 0.298185932636261, "rewards/frontier_coverage_5": 0.298185932636261, "signal/accuracy_reward/centered_abs_mean": 0.22000732421875, "signal/accuracy_reward/group_std_mean": 0.2672633767127991, "signal/accuracy_reward/group_zero_std_frac": 0.325, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.110003662109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.110003662109375, "signal/advantage_abs_mean": 0.3318171322345734, "signal/advantage_pre_scale_abs_mean": 0.3318171322345734, "signal/advantage_pre_scale_std": 0.40630478858947755, "signal/advantage_std": 0.40630478858947755, "signal/brier_reward/centered_abs_mean": 0.3074793994426727, "signal/brier_reward/group_std_mean": 0.3563279390335083, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030747941136360167, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.030747941136360167, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2858774721622467, "signal/confidence_uniqueness_reward/group_std_mean": 0.3415905833244324, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02858774848282337, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02858774848282337, "signal/format_reward/centered_abs_mean": 0.38082275390625, "signal/format_reward/group_std_mean": 0.43958239555358886, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.190411376953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.190411376953125, "signal/frontier_coverage_0/centered_abs_mean": 0.28009108304977415, "signal/frontier_coverage_0/group_std_mean": 0.33603209257125854, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004005302442237735, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004005302442237735, "signal/frontier_coverage_1/centered_abs_mean": 0.28009108304977415, "signal/frontier_coverage_1/group_std_mean": 0.33603209257125854, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004005302442237735, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004005302442237735, "signal/frontier_coverage_10/centered_abs_mean": 0.28009108304977415, "signal/frontier_coverage_10/group_std_mean": 0.33603209257125854, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004005302442237735, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004005302442237735, "signal/frontier_coverage_15/centered_abs_mean": 0.28009108304977415, "signal/frontier_coverage_15/group_std_mean": 0.33603209257125854, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004005302442237735, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004005302442237735, "signal/frontier_coverage_20/centered_abs_mean": 0.28009108304977415, "signal/frontier_coverage_20/group_std_mean": 0.33603209257125854, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004005302442237735, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004005302442237735, "signal/frontier_coverage_25/centered_abs_mean": 0.28009108304977415, "signal/frontier_coverage_25/group_std_mean": 0.33603209257125854, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004005302442237735, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004005302442237735, "signal/frontier_coverage_5/centered_abs_mean": 0.28009108304977415, "signal/frontier_coverage_5/group_std_mean": 0.33603209257125854, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004005302442237735, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004005302442237735, "step": 10 }, { "calibration/aurc": 0.6155595065201233, "calibration/batch_distribution_entropy": 0.6427073391342403, "calibration/buffer_distribution_entropy": 0.6650037228066683, "calibration/confidence_entropy": 0.34433009595302805, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.48542370452858075, "calibration/mean_confidence": 0.801199768518267, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0216796875, "completions/max_length": 1493.4, "completions/max_terminated_length": 1493.4, "completions/mean_length": 177.4697265625, "completions/mean_terminated_length": 181.56385803222656, "completions/min_length": 0.0, "completions/min_terminated_length": 2.2, "epoch": 0.048, "grad_norm": 0.02985437400639057, "learning_rate": 9.374999999999999e-07, "loss": 0.0052, "num_tokens": 51117519.0, "reward": 0.7041961193084717, "reward_std": 0.31099514067173006, "rewards/accuracy_reward": 0.260546875, "rewards/brier_reward": 0.47122411131858827, "rewards/confidence_uniqueness_reward": 0.6331815242767334, "rewards/format_reward": 0.866015625, "rewards/frontier_coverage_0": 0.30443855822086335, "rewards/frontier_coverage_1": 0.30443855822086335, "rewards/frontier_coverage_10": 0.30443855822086335, "rewards/frontier_coverage_15": 0.30443855822086335, "rewards/frontier_coverage_20": 0.30443855822086335, "rewards/frontier_coverage_25": 0.30443855822086335, "rewards/frontier_coverage_5": 0.30443855822086335, "signal/accuracy_reward/centered_abs_mean": 0.20245361328125, "signal/accuracy_reward/group_std_mean": 0.25178754329681396, "signal/accuracy_reward/group_zero_std_frac": 0.34375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.101226806640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.101226806640625, "signal/advantage_abs_mean": 0.23251062631607056, "signal/advantage_pre_scale_abs_mean": 0.23251062631607056, "signal/advantage_pre_scale_std": 0.32085421681404114, "signal/advantage_std": 0.32085421681404114, "signal/brier_reward/centered_abs_mean": 0.27578999400138854, "signal/brier_reward/group_std_mean": 0.32952038645744325, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02757900021970272, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02757900021970272, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.20353608727455139, "signal/confidence_uniqueness_reward/group_std_mean": 0.2688037037849426, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020353609696030617, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020353609696030617, "signal/format_reward/centered_abs_mean": 0.2126708984375, "signal/format_reward/group_std_mean": 0.30878249406814573, "signal/format_reward/group_zero_std_frac": 0.071875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10633544921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.10633544921875, "signal/frontier_coverage_0/centered_abs_mean": 0.23975261449813842, "signal/frontier_coverage_0/group_std_mean": 0.29435974061489106, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0034284623805433513, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034284623805433513, "signal/frontier_coverage_1/centered_abs_mean": 0.23975261449813842, "signal/frontier_coverage_1/group_std_mean": 0.29435974061489106, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034284623805433513, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034284623805433513, "signal/frontier_coverage_10/centered_abs_mean": 0.23975261449813842, "signal/frontier_coverage_10/group_std_mean": 0.29435974061489106, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034284623805433513, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034284623805433513, "signal/frontier_coverage_15/centered_abs_mean": 0.23975261449813842, "signal/frontier_coverage_15/group_std_mean": 0.29435974061489106, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034284623805433513, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034284623805433513, "signal/frontier_coverage_20/centered_abs_mean": 0.23975261449813842, "signal/frontier_coverage_20/group_std_mean": 0.29435974061489106, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034284623805433513, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034284623805433513, "signal/frontier_coverage_25/centered_abs_mean": 0.23975261449813842, "signal/frontier_coverage_25/group_std_mean": 0.29435974061489106, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034284623805433513, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034284623805433513, "signal/frontier_coverage_5/centered_abs_mean": 0.23975261449813842, "signal/frontier_coverage_5/group_std_mean": 0.29435974061489106, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034284623805433513, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034284623805433513, "step": 15 }, { "calibration/aurc": 0.5353270245215466, "calibration/batch_distribution_entropy": 0.6883228156852644, "calibration/buffer_distribution_entropy": 0.6604163855659319, "calibration/confidence_entropy": 0.3737941450349532, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3867639720734177, "calibration/mean_confidence": 0.7860741133838542, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00537109375, "completions/max_length": 1372.8, "completions/max_terminated_length": 1372.8, "completions/mean_length": 131.06396484375, "completions/mean_terminated_length": 131.7907257080078, "completions/min_length": 0.0, "completions/min_terminated_length": 19.8, "epoch": 0.064, "grad_norm": 0.011706759221851826, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 67378014.0, "reward": 0.796250331401825, "reward_std": 0.1896502822637558, "rewards/accuracy_reward": 0.3400390625, "rewards/brier_reward": 0.5660670876502991, "rewards/confidence_uniqueness_reward": 0.7469664692878724, "rewards/format_reward": 0.97880859375, "rewards/frontier_coverage_0": 0.055176225304603574, "rewards/frontier_coverage_1": 0.055176225304603574, "rewards/frontier_coverage_10": 0.055176225304603574, "rewards/frontier_coverage_15": 0.055176225304603574, "rewards/frontier_coverage_20": 0.055176225304603574, "rewards/frontier_coverage_25": 0.055176225304603574, "rewards/frontier_coverage_5": 0.055176225304603574, "signal/accuracy_reward/centered_abs_mean": 0.20543212890625, "signal/accuracy_reward/group_std_mean": 0.2560299515724182, "signal/accuracy_reward/group_zero_std_frac": 0.3375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.102716064453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.102716064453125, "signal/advantage_abs_mean": 0.14002106040716172, "signal/advantage_pre_scale_abs_mean": 0.14002106040716172, "signal/advantage_pre_scale_std": 0.20781327784061432, "signal/advantage_std": 0.20781327784061432, "signal/brier_reward/centered_abs_mean": 0.23954716920852662, "signal/brier_reward/group_std_mean": 0.2948504090309143, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023954717069864274, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.023954717069864274, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.12043386697769165, "signal/confidence_uniqueness_reward/group_std_mean": 0.1569172501564026, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012043387070298195, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012043387070298195, "signal/format_reward/centered_abs_mean": 0.039337158203125, "signal/format_reward/group_std_mean": 0.09293515011668205, "signal/format_reward/group_zero_std_frac": 0.546875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0196685791015625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0196685791015625, "signal/frontier_coverage_0/centered_abs_mean": 0.098221555352211, "signal/frontier_coverage_0/group_std_mean": 0.1575959414243698, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014045683201402426, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014045683201402426, "signal/frontier_coverage_1/centered_abs_mean": 0.098221555352211, "signal/frontier_coverage_1/group_std_mean": 0.1575959414243698, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014045683201402426, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014045683201402426, "signal/frontier_coverage_10/centered_abs_mean": 0.098221555352211, "signal/frontier_coverage_10/group_std_mean": 0.1575959414243698, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014045683201402426, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014045683201402426, "signal/frontier_coverage_15/centered_abs_mean": 0.098221555352211, "signal/frontier_coverage_15/group_std_mean": 0.1575959414243698, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014045683201402426, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014045683201402426, "signal/frontier_coverage_20/centered_abs_mean": 0.098221555352211, "signal/frontier_coverage_20/group_std_mean": 0.1575959414243698, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014045683201402426, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014045683201402426, "signal/frontier_coverage_25/centered_abs_mean": 0.098221555352211, "signal/frontier_coverage_25/group_std_mean": 0.1575959414243698, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014045683201402426, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014045683201402426, "signal/frontier_coverage_5/centered_abs_mean": 0.098221555352211, "signal/frontier_coverage_5/group_std_mean": 0.1575959414243698, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014045683201402426, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014045683201402426, "step": 20 }, { "calibration/aurc": 0.6381643594481621, "calibration/batch_distribution_entropy": 0.7804464216854378, "calibration/buffer_distribution_entropy": 0.6838159309570526, "calibration/confidence_entropy": 0.4490663154010008, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4193330378505376, "calibration/mean_confidence": 0.7320443286313054, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001171875, "completions/max_length": 711.4, "completions/max_terminated_length": 711.4, "completions/mean_length": 108.21484375, "completions/mean_terminated_length": 108.34236907958984, "completions/min_length": 0.0, "completions/min_terminated_length": 22.0, "epoch": 0.08, "grad_norm": 0.07899007946252823, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 83419286.0, "reward": 0.8220680236816407, "reward_std": 0.1564656525850296, "rewards/accuracy_reward": 0.356640625, "rewards/brier_reward": 0.6099278688430786, "rewards/confidence_uniqueness_reward": 0.8057548403739929, "rewards/format_reward": 0.99306640625, "rewards/frontier_coverage_0": 0.05640597715973854, "rewards/frontier_coverage_1": 0.05640597715973854, "rewards/frontier_coverage_10": 0.05640597715973854, "rewards/frontier_coverage_15": 0.05640597715973854, "rewards/frontier_coverage_20": 0.05640597715973854, "rewards/frontier_coverage_25": 0.05640597715973854, "rewards/frontier_coverage_5": 0.05640597715973854, "signal/accuracy_reward/centered_abs_mean": 0.189013671875, "signal/accuracy_reward/group_std_mean": 0.23597990572452546, "signal/accuracy_reward/group_zero_std_frac": 0.384375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0945068359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0945068359375, "signal/advantage_abs_mean": 0.11983990371227264, "signal/advantage_pre_scale_abs_mean": 0.11983990371227264, "signal/advantage_pre_scale_std": 0.1789884090423584, "signal/advantage_std": 0.1789884090423584, "signal/brier_reward/centered_abs_mean": 0.2191626399755478, "signal/brier_reward/group_std_mean": 0.2707302927970886, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021916263923048972, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.021916263923048972, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07767567187547683, "signal/confidence_uniqueness_reward/group_std_mean": 0.10581078231334687, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007767567411065102, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007767567411065102, "signal/format_reward/centered_abs_mean": 0.013055419921875, "signal/format_reward/group_std_mean": 0.0338589858263731, "signal/format_reward/group_zero_std_frac": 0.821875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0065277099609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0065277099609375, "signal/frontier_coverage_0/centered_abs_mean": 0.11382188200950623, "signal/frontier_coverage_0/group_std_mean": 0.17087749242782593, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016276529058814049, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016276529058814049, "signal/frontier_coverage_1/centered_abs_mean": 0.11382188200950623, "signal/frontier_coverage_1/group_std_mean": 0.17087749242782593, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016276529058814049, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016276529058814049, "signal/frontier_coverage_10/centered_abs_mean": 0.11382188200950623, "signal/frontier_coverage_10/group_std_mean": 0.17087749242782593, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016276529058814049, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016276529058814049, "signal/frontier_coverage_15/centered_abs_mean": 0.11382188200950623, "signal/frontier_coverage_15/group_std_mean": 0.17087749242782593, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016276529058814049, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016276529058814049, "signal/frontier_coverage_20/centered_abs_mean": 0.11382188200950623, "signal/frontier_coverage_20/group_std_mean": 0.17087749242782593, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016276529058814049, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016276529058814049, "signal/frontier_coverage_25/centered_abs_mean": 0.11382188200950623, "signal/frontier_coverage_25/group_std_mean": 0.17087749242782593, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016276529058814049, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016276529058814049, "signal/frontier_coverage_5/centered_abs_mean": 0.11382188200950623, "signal/frontier_coverage_5/group_std_mean": 0.17087749242782593, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016276529058814049, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016276529058814049, "step": 25 }, { "calibration/aurc": 0.6310570415406989, "calibration/batch_distribution_entropy": 0.8396163083060891, "calibration/buffer_distribution_entropy": 0.72589944855154, "calibration/confidence_entropy": 0.5324667987144341, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3369986010486203, "calibration/mean_confidence": 0.6545671946414907, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 550.4, "completions/max_terminated_length": 550.4, "completions/mean_length": 109.701171875, "completions/mean_terminated_length": 109.80919189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 37.6, "epoch": 0.096, "grad_norm": 0.004247599747031927, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 99587234.0, "reward": 0.8369772076606751, "reward_std": 0.1397940844297409, "rewards/accuracy_reward": 0.36689453125, "rewards/brier_reward": 0.6584429621696473, "rewards/confidence_uniqueness_reward": 0.8270732045173645, "rewards/format_reward": 0.996875, "rewards/frontier_coverage_0": 0.06534303873777389, "rewards/frontier_coverage_1": 0.06534303873777389, "rewards/frontier_coverage_10": 0.06534303873777389, "rewards/frontier_coverage_15": 0.06534303873777389, "rewards/frontier_coverage_20": 0.06534303873777389, "rewards/frontier_coverage_25": 0.06534303873777389, "rewards/frontier_coverage_5": 0.06534303873777389, "signal/accuracy_reward/centered_abs_mean": 0.176531982421875, "signal/accuracy_reward/group_std_mean": 0.2280410945415497, "signal/accuracy_reward/group_zero_std_frac": 0.371875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0882659912109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0882659912109375, "signal/advantage_abs_mean": 0.10705235004425048, "signal/advantage_pre_scale_abs_mean": 0.10705235004425048, "signal/advantage_pre_scale_std": 0.16180138289928436, "signal/advantage_std": 0.16180138289928436, "signal/brier_reward/centered_abs_mean": 0.19467200934886933, "signal/brier_reward/group_std_mean": 0.2417706161737442, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019467201083898544, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019467201083898544, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07996234148740769, "signal/confidence_uniqueness_reward/group_std_mean": 0.10100700855255126, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007996234111487865, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007996234111487865, "signal/format_reward/centered_abs_mean": 0.00604248046875, "signal/format_reward/group_std_mean": 0.017341360449790955, "signal/format_reward/group_zero_std_frac": 0.903125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003021240234375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003021240234375, "signal/frontier_coverage_0/centered_abs_mean": 0.14193402826786042, "signal/frontier_coverage_0/group_std_mean": 0.20070194005966185, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020296565489843488, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020296565489843488, "signal/frontier_coverage_1/centered_abs_mean": 0.14193402826786042, "signal/frontier_coverage_1/group_std_mean": 0.20070194005966185, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020296565489843488, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020296565489843488, "signal/frontier_coverage_10/centered_abs_mean": 0.14193402826786042, "signal/frontier_coverage_10/group_std_mean": 0.20070194005966185, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020296565489843488, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020296565489843488, "signal/frontier_coverage_15/centered_abs_mean": 0.14193402826786042, "signal/frontier_coverage_15/group_std_mean": 0.20070194005966185, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020296565489843488, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020296565489843488, "signal/frontier_coverage_20/centered_abs_mean": 0.14193402826786042, "signal/frontier_coverage_20/group_std_mean": 0.20070194005966185, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020296565489843488, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020296565489843488, "signal/frontier_coverage_25/centered_abs_mean": 0.14193402826786042, "signal/frontier_coverage_25/group_std_mean": 0.20070194005966185, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020296565489843488, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020296565489843488, "signal/frontier_coverage_5/centered_abs_mean": 0.14193402826786042, "signal/frontier_coverage_5/group_std_mean": 0.20070194005966185, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020296565489843488, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020296565489843488, "step": 30 }, { "calibration/aurc": 0.4946849277525329, "calibration/batch_distribution_entropy": 0.8813726157552029, "calibration/buffer_distribution_entropy": 0.770196496343847, "calibration/confidence_entropy": 0.5614673006037132, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.03561643835616438, "calibration/coverage@5%": 0.0, "calibration/ece": 0.2029153969087596, "calibration/mean_confidence": 0.5835205516314776, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 381.4, "completions/max_terminated_length": 381.4, "completions/mean_length": 117.57978515625, "completions/mean_terminated_length": 117.64874572753907, "completions/min_length": 16.8, "completions/min_terminated_length": 44.4, "epoch": 0.112, "grad_norm": 0.0023746925871819258, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 115900723.0, "reward": 0.867804765701294, "reward_std": 0.12527745664119722, "rewards/accuracy_reward": 0.41201171875, "rewards/brier_reward": 0.711748468875885, "rewards/confidence_uniqueness_reward": 0.8443502902984619, "rewards/format_reward": 0.998046875, "rewards/frontier_coverage_0": 0.07158430591225624, "rewards/frontier_coverage_1": 0.07158430591225624, "rewards/frontier_coverage_10": 0.07158430591225624, "rewards/frontier_coverage_15": 0.07158430591225624, "rewards/frontier_coverage_20": 0.07158430591225624, "rewards/frontier_coverage_25": 0.07158430591225624, "rewards/frontier_coverage_5": 0.07158430591225624, "signal/accuracy_reward/centered_abs_mean": 0.166949462890625, "signal/accuracy_reward/group_std_mean": 0.21374925673007966, "signal/accuracy_reward/group_zero_std_frac": 0.415625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0834747314453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0834747314453125, "signal/advantage_abs_mean": 0.09721089154481888, "signal/advantage_pre_scale_abs_mean": 0.09721089154481888, "signal/advantage_pre_scale_std": 0.14640960693359376, "signal/advantage_std": 0.14640960693359376, "signal/brier_reward/centered_abs_mean": 0.17608677446842194, "signal/brier_reward/group_std_mean": 0.2203920841217041, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017608677595853807, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017608677595853807, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08555223494768142, "signal/confidence_uniqueness_reward/group_std_mean": 0.10618715584278107, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008555223420262336, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008555223420262336, "signal/format_reward/centered_abs_mean": 0.00377197265625, "signal/format_reward/group_std_mean": 0.010712234629318118, "signal/format_reward/group_zero_std_frac": 0.940625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001885986328125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001885986328125, "signal/frontier_coverage_0/centered_abs_mean": 0.18098629117012024, "signal/frontier_coverage_0/group_std_mean": 0.23790799379348754, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002588103944435716, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002588103944435716, "signal/frontier_coverage_1/centered_abs_mean": 0.18098629117012024, "signal/frontier_coverage_1/group_std_mean": 0.23790799379348754, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002588103944435716, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002588103944435716, "signal/frontier_coverage_10/centered_abs_mean": 0.18098629117012024, "signal/frontier_coverage_10/group_std_mean": 0.23790799379348754, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002588103944435716, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002588103944435716, "signal/frontier_coverage_15/centered_abs_mean": 0.18098629117012024, "signal/frontier_coverage_15/group_std_mean": 0.23790799379348754, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002588103944435716, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002588103944435716, "signal/frontier_coverage_20/centered_abs_mean": 0.18098629117012024, "signal/frontier_coverage_20/group_std_mean": 0.23790799379348754, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002588103944435716, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002588103944435716, "signal/frontier_coverage_25/centered_abs_mean": 0.18098629117012024, "signal/frontier_coverage_25/group_std_mean": 0.23790799379348754, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002588103944435716, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002588103944435716, "signal/frontier_coverage_5/centered_abs_mean": 0.18098629117012024, "signal/frontier_coverage_5/group_std_mean": 0.23790799379348754, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002588103944435716, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002588103944435716, "step": 35 }, { "calibration/aurc": 0.5268772797001111, "calibration/batch_distribution_entropy": 0.8795172673167941, "calibration/buffer_distribution_entropy": 0.8186280886906422, "calibration/confidence_entropy": 0.5856780630341514, "calibration/coverage@0%": 0.001171875, "calibration/coverage@1%": 0.001171875, "calibration/coverage@10%": 0.001171875, "calibration/coverage@15%": 0.001171875, "calibration/coverage@20%": 0.001171875, "calibration/coverage@25%": 0.01252140410958904, "calibration/coverage@30%": 0.03831564946183953, "calibration/coverage@5%": 0.001171875, "calibration/ece": 0.14752713597429187, "calibration/mean_confidence": 0.46553835813256955, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 673.8, "completions/max_terminated_length": 673.8, "completions/mean_length": 127.0328125, "completions/mean_terminated_length": 127.10689544677734, "completions/min_length": 23.2, "completions/min_terminated_length": 54.2, "epoch": 0.128, "grad_norm": 0.0012905292678624392, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 132118211.0, "reward": 0.8764559626579285, "reward_std": 0.10876117348670959, "rewards/accuracy_reward": 0.41298828125, "rewards/brier_reward": 0.7420265793800354, "rewards/confidence_uniqueness_reward": 0.8668764352798461, "rewards/format_reward": 0.99892578125, "rewards/frontier_coverage_0": 0.09599030762910843, "rewards/frontier_coverage_1": 0.09599030762910843, "rewards/frontier_coverage_10": 0.09599030762910843, "rewards/frontier_coverage_15": 0.09599030762910843, "rewards/frontier_coverage_20": 0.09599030762910843, "rewards/frontier_coverage_25": 0.09599030762910843, "rewards/frontier_coverage_5": 0.09599030762910843, "signal/accuracy_reward/centered_abs_mean": 0.150067138671875, "signal/accuracy_reward/group_std_mean": 0.19439001083374025, "signal/accuracy_reward/group_zero_std_frac": 0.45625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0750335693359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0750335693359375, "signal/advantage_abs_mean": 0.08436928540468216, "signal/advantage_pre_scale_abs_mean": 0.08436928540468216, "signal/advantage_pre_scale_std": 0.12733531445264817, "signal/advantage_std": 0.12733531445264817, "signal/brier_reward/centered_abs_mean": 0.16247932612895966, "signal/brier_reward/group_std_mean": 0.20427174270153045, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016247932985424995, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016247932985424995, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0709274247288704, "signal/confidence_uniqueness_reward/group_std_mean": 0.08821647614240646, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0070927425287663935, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0070927425287663935, "signal/format_reward/centered_abs_mean": 0.002081298828125, "signal/format_reward/group_std_mean": 0.006076698657125235, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625, "signal/frontier_coverage_0/centered_abs_mean": 0.21489879190921785, "signal/frontier_coverage_0/group_std_mean": 0.27094546556472776, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0030730527359992266, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030730527359992266, "signal/frontier_coverage_1/centered_abs_mean": 0.21489879190921785, "signal/frontier_coverage_1/group_std_mean": 0.27094546556472776, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030730527359992266, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030730527359992266, "signal/frontier_coverage_10/centered_abs_mean": 0.21489879190921785, "signal/frontier_coverage_10/group_std_mean": 0.27094546556472776, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030730527359992266, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030730527359992266, "signal/frontier_coverage_15/centered_abs_mean": 0.21489879190921785, "signal/frontier_coverage_15/group_std_mean": 0.27094546556472776, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030730527359992266, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030730527359992266, "signal/frontier_coverage_20/centered_abs_mean": 0.21489879190921785, "signal/frontier_coverage_20/group_std_mean": 0.27094546556472776, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030730527359992266, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030730527359992266, "signal/frontier_coverage_25/centered_abs_mean": 0.21489879190921785, "signal/frontier_coverage_25/group_std_mean": 0.27094546556472776, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030730527359992266, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030730527359992266, "signal/frontier_coverage_5/centered_abs_mean": 0.21489879190921785, "signal/frontier_coverage_5/group_std_mean": 0.27094546556472776, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030730527359992266, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030730527359992266, "step": 40 }, { "calibration/aurc": 0.3662413115512146, "calibration/batch_distribution_entropy": 0.8725469191055767, "calibration/buffer_distribution_entropy": 0.8648538408069202, "calibration/confidence_entropy": 0.5608525866357722, "calibration/coverage@0%": 0.0007820144324853229, "calibration/coverage@1%": 0.0007820144324853229, "calibration/coverage@10%": 0.014089255136986301, "calibration/coverage@15%": 0.08962741560665362, "calibration/coverage@20%": 0.14284491193737767, "calibration/coverage@25%": 0.2508294092465753, "calibration/coverage@30%": 0.2805558953033268, "calibration/coverage@5%": 0.0007820144324853229, "calibration/ece": 0.2382096857898642, "calibration/mean_confidence": 0.38276720949307774, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 535.2, "completions/max_terminated_length": 535.2, "completions/mean_length": 134.48076171875, "completions/mean_terminated_length": 134.49421081542968, "completions/min_length": 42.6, "completions/min_terminated_length": 53.6, "epoch": 0.144, "grad_norm": 0.0015246145194396377, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 148445726.0, "reward": 0.9145649552345276, "reward_std": 0.0982263907790184, "rewards/accuracy_reward": 0.5052734375, "rewards/brier_reward": 0.7265760660171509, "rewards/confidence_uniqueness_reward": 0.8610557317733765, "rewards/format_reward": 0.99951171875, "rewards/frontier_coverage_0": 0.03405772633850575, "rewards/frontier_coverage_1": 0.03405772633850575, "rewards/frontier_coverage_10": 0.03405772633850575, "rewards/frontier_coverage_15": 0.03405772633850575, "rewards/frontier_coverage_20": 0.03405772633850575, "rewards/frontier_coverage_25": 0.03405772633850575, "rewards/frontier_coverage_5": 0.03405772633850575, "signal/accuracy_reward/centered_abs_mean": 0.14317626953125, "signal/accuracy_reward/group_std_mean": 0.192362380027771, "signal/accuracy_reward/group_zero_std_frac": 0.446875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.071588134765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.071588134765625, "signal/advantage_abs_mean": 0.07465749233961105, "signal/advantage_pre_scale_abs_mean": 0.07465749233961105, "signal/advantage_pre_scale_std": 0.1136060506105423, "signal/advantage_std": 0.1136060506105423, "signal/brier_reward/centered_abs_mean": 0.1587459623813629, "signal/brier_reward/group_std_mean": 0.1997540056705475, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015874596685171126, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015874596685171126, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08940582275390625, "signal/confidence_uniqueness_reward/group_std_mean": 0.1131935566663742, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008940582629293203, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008940582629293203, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_coverage_0/centered_abs_mean": 0.23958866000175477, "signal/frontier_coverage_0/group_std_mean": 0.30230913162231443, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003426117729395628, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003426117729395628, "signal/frontier_coverage_1/centered_abs_mean": 0.23958866000175477, "signal/frontier_coverage_1/group_std_mean": 0.30230913162231443, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003426117729395628, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003426117729395628, "signal/frontier_coverage_10/centered_abs_mean": 0.23958866000175477, "signal/frontier_coverage_10/group_std_mean": 0.30230913162231443, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003426117729395628, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003426117729395628, "signal/frontier_coverage_15/centered_abs_mean": 0.23958866000175477, "signal/frontier_coverage_15/group_std_mean": 0.30230913162231443, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003426117729395628, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003426117729395628, "signal/frontier_coverage_20/centered_abs_mean": 0.23958866000175477, "signal/frontier_coverage_20/group_std_mean": 0.30230913162231443, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003426117729395628, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003426117729395628, "signal/frontier_coverage_25/centered_abs_mean": 0.23958866000175477, "signal/frontier_coverage_25/group_std_mean": 0.30230913162231443, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003426117729395628, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003426117729395628, "signal/frontier_coverage_5/centered_abs_mean": 0.23958866000175477, "signal/frontier_coverage_5/group_std_mean": 0.30230913162231443, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003426117729395628, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003426117729395628, "step": 45 }, { "calibration/aurc": 0.454046793538352, "calibration/batch_distribution_entropy": 0.8398905686717141, "calibration/buffer_distribution_entropy": 0.901052292739411, "calibration/confidence_entropy": 0.545608272478401, "calibration/coverage@0%": 0.0019546538649706457, "calibration/coverage@1%": 0.0019546538649706457, "calibration/coverage@10%": 0.009376528864970646, "calibration/coverage@15%": 0.012892153864970645, "calibration/coverage@20%": 0.016017153864970646, "calibration/coverage@25%": 0.017189028864970646, "calibration/coverage@30%": 0.03593902886497065, "calibration/coverage@5%": 0.0019546538649706457, "calibration/ece": 0.1533710485313065, "calibration/mean_confidence": 0.3372224861869217, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 423.2, "completions/max_terminated_length": 423.2, "completions/mean_length": 141.1029296875, "completions/mean_terminated_length": 141.13076171875, "completions/min_length": 36.0, "completions/min_terminated_length": 59.6, "epoch": 0.16, "grad_norm": 0.0012460550060495734, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 164911548.0, "reward": 0.8956733822822571, "reward_std": 0.09705854654312134, "rewards/accuracy_reward": 0.45341796875, "rewards/brier_reward": 0.7379406452178955, "rewards/confidence_uniqueness_reward": 0.8653342247009277, "rewards/format_reward": 0.999609375, "rewards/frontier_coverage_0": 0.08823383674025535, "rewards/frontier_coverage_1": 0.08823383674025535, "rewards/frontier_coverage_10": 0.08823383674025535, "rewards/frontier_coverage_15": 0.08823383674025535, "rewards/frontier_coverage_20": 0.08823383674025535, "rewards/frontier_coverage_25": 0.08823383674025535, "rewards/frontier_coverage_5": 0.08823383674025535, "signal/accuracy_reward/centered_abs_mean": 0.150860595703125, "signal/accuracy_reward/group_std_mean": 0.19206807315349578, "signal/accuracy_reward/group_zero_std_frac": 0.475, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0754302978515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0754302978515625, "signal/advantage_abs_mean": 0.07680515646934509, "signal/advantage_pre_scale_abs_mean": 0.07680515646934509, "signal/advantage_pre_scale_std": 0.11379086673259735, "signal/advantage_std": 0.11379086673259735, "signal/brier_reward/centered_abs_mean": 0.15479380786418914, "signal/brier_reward/group_std_mean": 0.1973109394311905, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015479381382465362, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015479381382465362, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09476796388626099, "signal/confidence_uniqueness_reward/group_std_mean": 0.11929279714822769, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009476796537637711, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009476796537637711, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_coverage_0/centered_abs_mean": 0.2523395955562592, "signal/frontier_coverage_0/group_std_mean": 0.31533271074295044, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0036084561608731745, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036084561608731745, "signal/frontier_coverage_1/centered_abs_mean": 0.2523395955562592, "signal/frontier_coverage_1/group_std_mean": 0.31533271074295044, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036084561608731745, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036084561608731745, "signal/frontier_coverage_10/centered_abs_mean": 0.2523395955562592, "signal/frontier_coverage_10/group_std_mean": 0.31533271074295044, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036084561608731745, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036084561608731745, "signal/frontier_coverage_15/centered_abs_mean": 0.2523395955562592, "signal/frontier_coverage_15/group_std_mean": 0.31533271074295044, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036084561608731745, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036084561608731745, "signal/frontier_coverage_20/centered_abs_mean": 0.2523395955562592, "signal/frontier_coverage_20/group_std_mean": 0.31533271074295044, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036084561608731745, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036084561608731745, "signal/frontier_coverage_25/centered_abs_mean": 0.2523395955562592, "signal/frontier_coverage_25/group_std_mean": 0.31533271074295044, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036084561608731745, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036084561608731745, "signal/frontier_coverage_5/centered_abs_mean": 0.2523395955562592, "signal/frontier_coverage_5/group_std_mean": 0.31533271074295044, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036084561608731745, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036084561608731745, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.5985907180934281, "eval_calibration/batch_distribution_entropy": 0.7980443905263197, "eval_calibration/buffer_distribution_entropy": 0.9172991735258191, "eval_calibration/confidence_entropy": 0.5452766110964884, "eval_calibration/coverage@0%": 0.015625, "eval_calibration/coverage@1%": 0.015625, "eval_calibration/coverage@10%": 0.015625, "eval_calibration/coverage@15%": 0.015625, "eval_calibration/coverage@20%": 0.0546875, "eval_calibration/coverage@25%": 0.078125, "eval_calibration/coverage@30%": 0.0859375, "eval_calibration/coverage@5%": 0.015625, "eval_calibration/ece": 0.20335937500000004, "eval_calibration/mean_confidence": 0.359296875, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 284.75, "eval_completions/max_terminated_length": 284.75, "eval_completions/mean_length": 144.8631477355957, "eval_completions/mean_terminated_length": 144.8631477355957, "eval_completions/min_length": 74.0, "eval_completions/min_terminated_length": 74.0, "eval_loss": 0.0, "eval_num_tokens": 164911548.0, "eval_reward": 0.8594861328601837, "eval_reward_std": 0.1926819011569023, "eval_rewards/accuracy_reward": 0.373046875, "eval_rewards/brier_reward": 0.7505200058221817, "eval_rewards/confidence_uniqueness_reward": 0.833251953125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.1457089427858591, "eval_rewards/frontier_coverage_1": 0.1457089427858591, "eval_rewards/frontier_coverage_10": 0.1457089427858591, "eval_rewards/frontier_coverage_15": 0.1457089427858591, "eval_rewards/frontier_coverage_20": 0.1457089427858591, "eval_rewards/frontier_coverage_25": 0.1457089427858591, "eval_rewards/frontier_coverage_5": 0.1457089427858591, "eval_runtime": 15.9676, "eval_samples_per_second": 31.313, "eval_signal/accuracy_reward/centered_abs_mean": 0.4466552734375, "eval_signal/accuracy_reward/group_std_mean": 0.4786013886332512, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22332763671875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22332763671875, "eval_signal/advantage_abs_mean": 0.17472263425588608, "eval_signal/advantage_pre_scale_abs_mean": 0.17472263425588608, "eval_signal/advantage_pre_scale_std": 0.19090154394507408, "eval_signal/advantage_std": 0.19090154394507408, "eval_signal/brier_reward/centered_abs_mean": 0.19750789552927017, "eval_signal/brier_reward/group_std_mean": 0.2354220263659954, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019750789739191532, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019750789739191532, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0953521728515625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1108260452747345, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009535217541269958, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009535217541269958, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.4024868533015251, "eval_signal/frontier_coverage_0/group_std_mean": 0.473217248916626, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0057555618695914745, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0057555618695914745, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.4024868533015251, "eval_signal/frontier_coverage_1/group_std_mean": 0.473217248916626, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0057555618695914745, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0057555618695914745, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.4024868533015251, "eval_signal/frontier_coverage_10/group_std_mean": 0.473217248916626, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0057555618695914745, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0057555618695914745, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.4024868533015251, "eval_signal/frontier_coverage_15/group_std_mean": 0.473217248916626, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0057555618695914745, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0057555618695914745, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.4024868533015251, "eval_signal/frontier_coverage_20/group_std_mean": 0.473217248916626, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0057555618695914745, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0057555618695914745, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.4024868533015251, "eval_signal/frontier_coverage_25/group_std_mean": 0.473217248916626, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0057555618695914745, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0057555618695914745, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.4024868533015251, "eval_signal/frontier_coverage_5/group_std_mean": 0.473217248916626, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0057555618695914745, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0057555618695914745, "eval_steps_per_second": 0.251, "step": 50 }, { "calibration/aurc": 0.4537260570762408, "calibration/batch_distribution_entropy": 0.9062964846595729, "calibration/buffer_distribution_entropy": 0.924649477340763, "calibration/confidence_entropy": 0.542944820963166, "calibration/coverage@0%": 0.0015625, "calibration/coverage@1%": 0.0015625, "calibration/coverage@10%": 0.0015625, "calibration/coverage@15%": 0.0015625, "calibration/coverage@20%": 0.0078125, "calibration/coverage@25%": 0.022265625, "calibration/coverage@30%": 0.115234375, "calibration/coverage@5%": 0.0015625, "calibration/ece": 0.18426962719962278, "calibration/mean_confidence": 0.3850698915774798, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 452.8, "completions/max_terminated_length": 452.8, "completions/mean_length": 149.6189453125, "completions/mean_terminated_length": 149.6783416748047, "completions/min_length": 39.6, "completions/min_terminated_length": 66.0, "epoch": 0.176, "grad_norm": 0.0017046101856976748, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 181680766.0, "reward": 0.8976470112800599, "reward_std": 0.09877448678016662, "rewards/accuracy_reward": 0.44765625, "rewards/brier_reward": 0.7434515833854676, "rewards/confidence_uniqueness_reward": 0.893541157245636, "rewards/format_reward": 0.99951171875, "rewards/frontier_coverage_0": 0.10353371798992157, "rewards/frontier_coverage_1": 0.10353371798992157, "rewards/frontier_coverage_10": 0.10353371798992157, "rewards/frontier_coverage_15": 0.10353371798992157, "rewards/frontier_coverage_20": 0.10353371798992157, "rewards/frontier_coverage_25": 0.10353371798992157, "rewards/frontier_coverage_5": 0.10353371798992157, "signal/accuracy_reward/centered_abs_mean": 0.15311279296875, "signal/accuracy_reward/group_std_mean": 0.19436658024787903, "signal/accuracy_reward/group_zero_std_frac": 0.475, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.076556396484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.076556396484375, "signal/advantage_abs_mean": 0.07793679982423782, "signal/advantage_pre_scale_abs_mean": 0.07793679982423782, "signal/advantage_pre_scale_std": 0.11506912857294083, "signal/advantage_std": 0.11506912857294083, "signal/brier_reward/centered_abs_mean": 0.16262381374835969, "signal/brier_reward/group_std_mean": 0.20505278408527375, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016262382455170154, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016262382455170154, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06747991964221, "signal/confidence_uniqueness_reward/group_std_mean": 0.08725375980138779, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006747992150485516, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006747992150485516, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.002762135770171881, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_coverage_0/centered_abs_mean": 0.25853300988674166, "signal/frontier_coverage_0/group_std_mean": 0.320653623342514, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003697022097185254, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003697022097185254, "signal/frontier_coverage_1/centered_abs_mean": 0.25853300988674166, "signal/frontier_coverage_1/group_std_mean": 0.320653623342514, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003697022097185254, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003697022097185254, "signal/frontier_coverage_10/centered_abs_mean": 0.25853300988674166, "signal/frontier_coverage_10/group_std_mean": 0.320653623342514, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003697022097185254, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003697022097185254, "signal/frontier_coverage_15/centered_abs_mean": 0.25853300988674166, "signal/frontier_coverage_15/group_std_mean": 0.320653623342514, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003697022097185254, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003697022097185254, "signal/frontier_coverage_20/centered_abs_mean": 0.25853300988674166, "signal/frontier_coverage_20/group_std_mean": 0.320653623342514, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003697022097185254, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003697022097185254, "signal/frontier_coverage_25/centered_abs_mean": 0.25853300988674166, "signal/frontier_coverage_25/group_std_mean": 0.320653623342514, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003697022097185254, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003697022097185254, "signal/frontier_coverage_5/centered_abs_mean": 0.25853300988674166, "signal/frontier_coverage_5/group_std_mean": 0.320653623342514, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003697022097185254, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003697022097185254, "step": 55 }, { "calibration/aurc": 0.3573644592066947, "calibration/batch_distribution_entropy": 0.9555272460757454, "calibration/buffer_distribution_entropy": 0.9379707462670741, "calibration/confidence_entropy": 0.5269331213271102, "calibration/coverage@0%": 0.0015640288649706457, "calibration/coverage@1%": 0.0015640288649706457, "calibration/coverage@10%": 0.010565985812133073, "calibration/coverage@15%": 0.01721960616438356, "calibration/coverage@20%": 0.10710387108610568, "calibration/coverage@25%": 0.18689151174168295, "calibration/coverage@30%": 0.2851256727005871, "calibration/coverage@5%": 0.010565985812133073, "calibration/ece": 0.11475980011122759, "calibration/mean_confidence": 0.4282593191460644, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 440.2, "completions/max_terminated_length": 440.2, "completions/mean_length": 157.43359375, "completions/mean_terminated_length": 157.4958282470703, "completions/min_length": 41.4, "completions/min_terminated_length": 65.8, "epoch": 0.192, "grad_norm": 0.0011922204867005348, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 198107702.0, "reward": 0.9124896407127381, "reward_std": 0.10086087137460709, "rewards/accuracy_reward": 0.47177734375, "rewards/brier_reward": 0.7537966251373291, "rewards/confidence_uniqueness_reward": 0.9138024330139161, "rewards/format_reward": 0.99931640625, "rewards/frontier_coverage_0": 0.10172683596611024, "rewards/frontier_coverage_1": 0.10172683596611024, "rewards/frontier_coverage_10": 0.10172683596611024, "rewards/frontier_coverage_15": 0.10172683596611024, "rewards/frontier_coverage_20": 0.10172683596611024, "rewards/frontier_coverage_25": 0.10172683596611024, "rewards/frontier_coverage_5": 0.10172683596611024, "signal/accuracy_reward/centered_abs_mean": 0.146429443359375, "signal/accuracy_reward/group_std_mean": 0.1905221551656723, "signal/accuracy_reward/group_zero_std_frac": 0.46875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0732147216796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0732147216796875, "signal/advantage_abs_mean": 0.0779627725481987, "signal/advantage_pre_scale_abs_mean": 0.0779627725481987, "signal/advantage_pre_scale_std": 0.1176736056804657, "signal/advantage_std": 0.1176736056804657, "signal/brier_reward/centered_abs_mean": 0.1688424438238144, "signal/brier_reward/group_std_mean": 0.2119818925857544, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0168842451646924, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0168842451646924, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05015767216682434, "signal/confidence_uniqueness_reward/group_std_mean": 0.06539249867200851, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005015767458826303, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005015767458826303, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.003866990143433213, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_coverage_0/centered_abs_mean": 0.24883936941623688, "signal/frontier_coverage_0/group_std_mean": 0.31324060559272765, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003558403067290783, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003558403067290783, "signal/frontier_coverage_1/centered_abs_mean": 0.24883936941623688, "signal/frontier_coverage_1/group_std_mean": 0.31324060559272765, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003558403067290783, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003558403067290783, "signal/frontier_coverage_10/centered_abs_mean": 0.24883936941623688, "signal/frontier_coverage_10/group_std_mean": 0.31324060559272765, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003558403067290783, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003558403067290783, "signal/frontier_coverage_15/centered_abs_mean": 0.24883936941623688, "signal/frontier_coverage_15/group_std_mean": 0.31324060559272765, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003558403067290783, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003558403067290783, "signal/frontier_coverage_20/centered_abs_mean": 0.24883936941623688, "signal/frontier_coverage_20/group_std_mean": 0.31324060559272765, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003558403067290783, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003558403067290783, "signal/frontier_coverage_25/centered_abs_mean": 0.24883936941623688, "signal/frontier_coverage_25/group_std_mean": 0.31324060559272765, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003558403067290783, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003558403067290783, "signal/frontier_coverage_5/centered_abs_mean": 0.24883936941623688, "signal/frontier_coverage_5/group_std_mean": 0.31324060559272765, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003558403067290783, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003558403067290783, "step": 60 }, { "calibration/aurc": 0.30967251532063694, "calibration/batch_distribution_entropy": 0.9789505653815354, "calibration/buffer_distribution_entropy": 0.9463065200388749, "calibration/confidence_entropy": 0.5029713373170281, "calibration/coverage@0%": 0.015625, "calibration/coverage@1%": 0.015625, "calibration/coverage@10%": 0.075, "calibration/coverage@15%": 0.179296875, "calibration/coverage@20%": 0.304296875, "calibration/coverage@25%": 0.48671875, "calibration/coverage@30%": 0.541796875, "calibration/coverage@5%": 0.015625, "calibration/ece": 0.1678404179115396, "calibration/mean_confidence": 0.4842072225556321, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 428.2, "completions/max_terminated_length": 428.2, "completions/mean_length": 165.9119140625, "completions/mean_terminated_length": 165.9119140625, "completions/min_length": 74.8, "completions/min_terminated_length": 74.8, "epoch": 0.208, "grad_norm": 0.001235796487890184, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 214838864.0, "reward": 0.9380666136741638, "reward_std": 0.10160990357398987, "rewards/accuracy_reward": 0.52392578125, "rewards/brier_reward": 0.7590964794158935, "rewards/confidence_uniqueness_reward": 0.9277396678924561, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.07510250061750412, "rewards/frontier_coverage_1": 0.07510250061750412, "rewards/frontier_coverage_10": 0.07510250061750412, "rewards/frontier_coverage_15": 0.07510250061750412, "rewards/frontier_coverage_20": 0.07510250061750412, "rewards/frontier_coverage_25": 0.07510250061750412, "rewards/frontier_coverage_5": 0.07510250061750412, "signal/accuracy_reward/centered_abs_mean": 0.139874267578125, "signal/accuracy_reward/group_std_mean": 0.1816681444644928, "signal/accuracy_reward/group_zero_std_frac": 0.4875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0699371337890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0699371337890625, "signal/advantage_abs_mean": 0.0794785276055336, "signal/advantage_pre_scale_abs_mean": 0.0794785276055336, "signal/advantage_pre_scale_std": 0.12093794941902161, "signal/advantage_std": 0.12093794941902161, "signal/brier_reward/centered_abs_mean": 0.17095426023006438, "signal/brier_reward/group_std_mean": 0.21720809936523439, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017095426470041274, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017095426470041274, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.039772205799818036, "signal/confidence_uniqueness_reward/group_std_mean": 0.04772929325699806, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003977220831438899, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003977220831438899, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.229102823138237, "signal/frontier_coverage_0/group_std_mean": 0.29095078706741334, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003276170324534178, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003276170324534178, "signal/frontier_coverage_1/centered_abs_mean": 0.229102823138237, "signal/frontier_coverage_1/group_std_mean": 0.29095078706741334, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003276170324534178, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003276170324534178, "signal/frontier_coverage_10/centered_abs_mean": 0.229102823138237, "signal/frontier_coverage_10/group_std_mean": 0.29095078706741334, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003276170324534178, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003276170324534178, "signal/frontier_coverage_15/centered_abs_mean": 0.229102823138237, "signal/frontier_coverage_15/group_std_mean": 0.29095078706741334, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003276170324534178, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003276170324534178, "signal/frontier_coverage_20/centered_abs_mean": 0.229102823138237, "signal/frontier_coverage_20/group_std_mean": 0.29095078706741334, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003276170324534178, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003276170324534178, "signal/frontier_coverage_25/centered_abs_mean": 0.229102823138237, "signal/frontier_coverage_25/group_std_mean": 0.29095078706741334, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003276170324534178, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003276170324534178, "signal/frontier_coverage_5/centered_abs_mean": 0.229102823138237, "signal/frontier_coverage_5/group_std_mean": 0.29095078706741334, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003276170324534178, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003276170324534178, "step": 65 }, { "calibration/aurc": 0.3461947025366329, "calibration/batch_distribution_entropy": 0.9871950056652427, "calibration/buffer_distribution_entropy": 0.9528396482217868, "calibration/confidence_entropy": 0.47853435639134895, "calibration/coverage@0%": 0.00390625, "calibration/coverage@1%": 0.00390625, "calibration/coverage@10%": 0.026998226516634048, "calibration/coverage@15%": 0.1576489114481409, "calibration/coverage@20%": 0.2221891817514677, "calibration/coverage@25%": 0.3430818860078278, "calibration/coverage@30%": 0.44168909001956946, "calibration/coverage@5%": 0.00390625, "calibration/ece": 0.1482485175463591, "calibration/mean_confidence": 0.4928267662792173, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 458.4, "completions/max_terminated_length": 458.4, "completions/mean_length": 169.5767578125, "completions/mean_terminated_length": 169.69363708496093, "completions/min_length": 14.6, "completions/min_terminated_length": 73.2, "epoch": 0.224, "grad_norm": 0.0013179010711610317, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 231728514.0, "reward": 0.9151730179786682, "reward_std": 0.10305744558572769, "rewards/accuracy_reward": 0.4703125, "rewards/brier_reward": 0.7591888546943665, "rewards/confidence_uniqueness_reward": 0.9291624546051025, "rewards/format_reward": 0.99921875, "rewards/frontier_coverage_0": 0.11560697555541992, "rewards/frontier_coverage_1": 0.11560697555541992, "rewards/frontier_coverage_10": 0.11560697555541992, "rewards/frontier_coverage_15": 0.11560697555541992, "rewards/frontier_coverage_20": 0.11560697555541992, "rewards/frontier_coverage_25": 0.11560697555541992, "rewards/frontier_coverage_5": 0.11560697555541992, "signal/accuracy_reward/centered_abs_mean": 0.12928466796875, "signal/accuracy_reward/group_std_mean": 0.17319436967372895, "signal/accuracy_reward/group_zero_std_frac": 0.496875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.064642333984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.064642333984375, "signal/advantage_abs_mean": 0.07788805365562439, "signal/advantage_pre_scale_abs_mean": 0.07788805365562439, "signal/advantage_pre_scale_std": 0.12274336367845536, "signal/advantage_std": 0.12274336367845536, "signal/brier_reward/centered_abs_mean": 0.1776938557624817, "signal/brier_reward/group_std_mean": 0.22498490810394287, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01776938550174236, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01776938550174236, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04329204186797142, "signal/confidence_uniqueness_reward/group_std_mean": 0.05236217975616455, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004329204373061657, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004329204373061657, "signal/format_reward/centered_abs_mean": 0.00150146484375, "signal/format_reward/group_std_mean": 0.004083108901977539, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000750732421875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000750732421875, "signal/frontier_coverage_0/centered_abs_mean": 0.2169666290283203, "signal/frontier_coverage_0/group_std_mean": 0.2803891360759735, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0031026228331029414, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031026228331029414, "signal/frontier_coverage_1/centered_abs_mean": 0.2169666290283203, "signal/frontier_coverage_1/group_std_mean": 0.2803891360759735, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031026228331029414, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031026228331029414, "signal/frontier_coverage_10/centered_abs_mean": 0.2169666290283203, "signal/frontier_coverage_10/group_std_mean": 0.2803891360759735, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031026228331029414, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031026228331029414, "signal/frontier_coverage_15/centered_abs_mean": 0.2169666290283203, "signal/frontier_coverage_15/group_std_mean": 0.2803891360759735, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031026228331029414, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031026228331029414, "signal/frontier_coverage_20/centered_abs_mean": 0.2169666290283203, "signal/frontier_coverage_20/group_std_mean": 0.2803891360759735, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031026228331029414, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031026228331029414, "signal/frontier_coverage_25/centered_abs_mean": 0.2169666290283203, "signal/frontier_coverage_25/group_std_mean": 0.2803891360759735, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031026228331029414, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031026228331029414, "signal/frontier_coverage_5/centered_abs_mean": 0.2169666290283203, "signal/frontier_coverage_5/group_std_mean": 0.2803891360759735, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031026228331029414, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031026228331029414, "step": 70 }, { "calibration/aurc": 0.3850509745798207, "calibration/batch_distribution_entropy": 0.9636978993198815, "calibration/buffer_distribution_entropy": 0.9581510600414204, "calibration/confidence_entropy": 0.4726358455065284, "calibration/coverage@0%": 0.01171875, "calibration/coverage@1%": 0.01171875, "calibration/coverage@10%": 0.128125, "calibration/coverage@15%": 0.19259112035225048, "calibration/coverage@20%": 0.21604696673189822, "calibration/coverage@25%": 0.23679060665362034, "calibration/coverage@30%": 0.24187866927592952, "calibration/coverage@5%": 0.07578125, "calibration/ece": 0.18973087646757542, "calibration/mean_confidence": 0.5367774832772495, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 453.8, "completions/max_terminated_length": 453.8, "completions/mean_length": 174.0443359375, "completions/mean_terminated_length": 174.1128356933594, "completions/min_length": 47.4, "completions/min_terminated_length": 79.0, "epoch": 0.24, "grad_norm": 0.0013802044559270144, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 248762408.0, "reward": 0.944013798236847, "reward_std": 0.11029932498931885, "rewards/accuracy_reward": 0.53603515625, "rewards/brier_reward": 0.7548027634620667, "rewards/confidence_uniqueness_reward": 0.934007465839386, "rewards/format_reward": 0.9994140625, "rewards/frontier_coverage_0": 0.07400779929012061, "rewards/frontier_coverage_1": 0.07400779929012061, "rewards/frontier_coverage_10": 0.07400779929012061, "rewards/frontier_coverage_15": 0.07400779929012061, "rewards/frontier_coverage_20": 0.07400779929012061, "rewards/frontier_coverage_25": 0.07400779929012061, "rewards/frontier_coverage_5": 0.07400779929012061, "signal/accuracy_reward/centered_abs_mean": 0.148919677734375, "signal/accuracy_reward/group_std_mean": 0.19519998431205748, "signal/accuracy_reward/group_zero_std_frac": 0.45, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0744598388671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0744598388671875, "signal/advantage_abs_mean": 0.08497563004493713, "signal/advantage_pre_scale_abs_mean": 0.08497563004493713, "signal/advantage_pre_scale_std": 0.13136209100484847, "signal/advantage_std": 0.13136209100484847, "signal/brier_reward/centered_abs_mean": 0.1855131357908249, "signal/brier_reward/group_std_mean": 0.2328798860311508, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018551314249634743, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018551314249634743, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04416573867201805, "signal/confidence_uniqueness_reward/group_std_mean": 0.053702594339847566, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004416573978960514, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004416573978960514, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629335194827, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_coverage_0/centered_abs_mean": 0.21642023622989653, "signal/frontier_coverage_0/group_std_mean": 0.283210289478302, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0030948093626648188, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030948093626648188, "signal/frontier_coverage_1/centered_abs_mean": 0.21642023622989653, "signal/frontier_coverage_1/group_std_mean": 0.283210289478302, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030948093626648188, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030948093626648188, "signal/frontier_coverage_10/centered_abs_mean": 0.21642023622989653, "signal/frontier_coverage_10/group_std_mean": 0.283210289478302, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030948093626648188, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030948093626648188, "signal/frontier_coverage_15/centered_abs_mean": 0.21642023622989653, "signal/frontier_coverage_15/group_std_mean": 0.283210289478302, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030948093626648188, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030948093626648188, "signal/frontier_coverage_20/centered_abs_mean": 0.21642023622989653, "signal/frontier_coverage_20/group_std_mean": 0.283210289478302, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030948093626648188, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030948093626648188, "signal/frontier_coverage_25/centered_abs_mean": 0.21642023622989653, "signal/frontier_coverage_25/group_std_mean": 0.283210289478302, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030948093626648188, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030948093626648188, "signal/frontier_coverage_5/centered_abs_mean": 0.21642023622989653, "signal/frontier_coverage_5/group_std_mean": 0.283210289478302, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030948093626648188, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030948093626648188, "step": 75 }, { "calibration/aurc": 0.309607338599339, "calibration/batch_distribution_entropy": 0.9670440695097444, "calibration/buffer_distribution_entropy": 0.9620481829535976, "calibration/confidence_entropy": 0.4398015262158295, "calibration/coverage@0%": 0.004307598039215686, "calibration/coverage@1%": 0.004307598039215686, "calibration/coverage@10%": 0.09665747549019607, "calibration/coverage@15%": 0.237280943627451, "calibration/coverage@20%": 0.34340226715686273, "calibration/coverage@25%": 0.4869592524509804, "calibration/coverage@30%": 0.6019806985294117, "calibration/coverage@5%": 0.00940563725490196, "calibration/ece": 0.12086222182290382, "calibration/mean_confidence": 0.5231612283193191, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 597.2, "completions/max_terminated_length": 597.2, "completions/mean_length": 172.6607421875, "completions/mean_terminated_length": 172.8123809814453, "completions/min_length": 14.2, "completions/min_terminated_length": 78.0, "epoch": 0.256, "grad_norm": 0.0012472033267840743, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 265585270.0, "reward": 0.933080542087555, "reward_std": 0.10499893128871918, "rewards/accuracy_reward": 0.50439453125, "rewards/brier_reward": 0.7625807642936706, "rewards/confidence_uniqueness_reward": 0.9408350467681885, "rewards/format_reward": 0.99873046875, "rewards/frontier_coverage_0": 0.11165271587669849, "rewards/frontier_coverage_1": 0.11165271587669849, "rewards/frontier_coverage_10": 0.11165271587669849, "rewards/frontier_coverage_15": 0.11165271587669849, "rewards/frontier_coverage_20": 0.11165271587669849, "rewards/frontier_coverage_25": 0.11165271587669849, "rewards/frontier_coverage_5": 0.11165271587669849, "signal/accuracy_reward/centered_abs_mean": 0.141839599609375, "signal/accuracy_reward/group_std_mean": 0.18213264644145966, "signal/accuracy_reward/group_zero_std_frac": 0.490625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0709197998046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0709197998046875, "signal/advantage_abs_mean": 0.08127593994140625, "signal/advantage_pre_scale_abs_mean": 0.08127593994140625, "signal/advantage_pre_scale_std": 0.12862132340669633, "signal/advantage_std": 0.12862132340669633, "signal/brier_reward/centered_abs_mean": 0.17898644208908082, "signal/brier_reward/group_std_mean": 0.22654514908790588, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017898644879460336, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017898644879460336, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.039608802646398544, "signal/confidence_uniqueness_reward/group_std_mean": 0.05151473581790924, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003960880218073726, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003960880218073726, "signal/format_reward/centered_abs_mean": 0.002459716796875, "signal/format_reward/group_std_mean": 0.007181553030386567, "signal/format_reward/group_zero_std_frac": 0.959375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012298583984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012298583984375, "signal/frontier_coverage_0/centered_abs_mean": 0.21713967025279998, "signal/frontier_coverage_0/group_std_mean": 0.2821187674999237, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00310509717091918, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00310509717091918, "signal/frontier_coverage_1/centered_abs_mean": 0.21713967025279998, "signal/frontier_coverage_1/group_std_mean": 0.2821187674999237, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00310509717091918, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00310509717091918, "signal/frontier_coverage_10/centered_abs_mean": 0.21713967025279998, "signal/frontier_coverage_10/group_std_mean": 0.2821187674999237, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00310509717091918, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00310509717091918, "signal/frontier_coverage_15/centered_abs_mean": 0.21713967025279998, "signal/frontier_coverage_15/group_std_mean": 0.2821187674999237, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00310509717091918, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00310509717091918, "signal/frontier_coverage_20/centered_abs_mean": 0.21713967025279998, "signal/frontier_coverage_20/group_std_mean": 0.2821187674999237, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00310509717091918, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00310509717091918, "signal/frontier_coverage_25/centered_abs_mean": 0.21713967025279998, "signal/frontier_coverage_25/group_std_mean": 0.2821187674999237, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00310509717091918, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00310509717091918, "signal/frontier_coverage_5/centered_abs_mean": 0.21713967025279998, "signal/frontier_coverage_5/group_std_mean": 0.2821187674999237, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00310509717091918, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00310509717091918, "step": 80 }, { "calibration/aurc": 0.3896367144097038, "calibration/batch_distribution_entropy": 0.9838301207068794, "calibration/buffer_distribution_entropy": 0.9661401903129894, "calibration/confidence_entropy": 0.46300136102847683, "calibration/coverage@0%": 0.008991254892367906, "calibration/coverage@1%": 0.008991254892367906, "calibration/coverage@10%": 0.0543068126223092, "calibration/coverage@15%": 0.10274431262230918, "calibration/coverage@20%": 0.1363380626223092, "calibration/coverage@25%": 0.2075288955479452, "calibration/coverage@30%": 0.2786868578767123, "calibration/coverage@5%": 0.026960004892367904, "calibration/ece": 0.1511877769353029, "calibration/mean_confidence": 0.4922436817696364, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 424.2, "completions/max_terminated_length": 424.2, "completions/mean_length": 180.82041015625, "completions/mean_terminated_length": 180.90801086425782, "completions/min_length": 17.2, "completions/min_terminated_length": 82.6, "epoch": 0.272, "grad_norm": 0.0010296551045030355, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 282402567.0, "reward": 0.9305618286132813, "reward_std": 0.09933947324752808, "rewards/accuracy_reward": 0.49638671875, "rewards/brier_reward": 0.7602882385253906, "rewards/confidence_uniqueness_reward": 0.9493825793266296, "rewards/format_reward": 0.99931640625, "rewards/frontier_coverage_0": 0.11731471288949251, "rewards/frontier_coverage_1": 0.11731471288949251, "rewards/frontier_coverage_10": 0.11731471288949251, "rewards/frontier_coverage_15": 0.11731471288949251, "rewards/frontier_coverage_20": 0.11731471288949251, "rewards/frontier_coverage_25": 0.11731471288949251, "rewards/frontier_coverage_5": 0.11731471288949251, "signal/accuracy_reward/centered_abs_mean": 0.131195068359375, "signal/accuracy_reward/group_std_mean": 0.16806706488132478, "signal/accuracy_reward/group_zero_std_frac": 0.5375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0655975341796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0655975341796875, "signal/advantage_abs_mean": 0.0770923689007759, "signal/advantage_pre_scale_abs_mean": 0.0770923689007759, "signal/advantage_pre_scale_std": 0.12059997916221618, "signal/advantage_std": 0.12059997916221618, "signal/brier_reward/centered_abs_mean": 0.18005068302154542, "signal/brier_reward/group_std_mean": 0.2274363726377487, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018005067855119704, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018005067855119704, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029679254069924353, "signal/confidence_uniqueness_reward/group_std_mean": 0.03936323225498199, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029679253231734036, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029679253231734036, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.003866990143433213, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_coverage_0/centered_abs_mean": 0.2299924910068512, "signal/frontier_coverage_0/group_std_mean": 0.29544530510902406, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0032888925168663265, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0032888925168663265, "signal/frontier_coverage_1/centered_abs_mean": 0.2299924910068512, "signal/frontier_coverage_1/group_std_mean": 0.29544530510902406, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032888925168663265, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032888925168663265, "signal/frontier_coverage_10/centered_abs_mean": 0.2299924910068512, "signal/frontier_coverage_10/group_std_mean": 0.29544530510902406, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032888925168663265, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032888925168663265, "signal/frontier_coverage_15/centered_abs_mean": 0.2299924910068512, "signal/frontier_coverage_15/group_std_mean": 0.29544530510902406, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032888925168663265, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032888925168663265, "signal/frontier_coverage_20/centered_abs_mean": 0.2299924910068512, "signal/frontier_coverage_20/group_std_mean": 0.29544530510902406, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032888925168663265, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032888925168663265, "signal/frontier_coverage_25/centered_abs_mean": 0.2299924910068512, "signal/frontier_coverage_25/group_std_mean": 0.29544530510902406, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032888925168663265, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032888925168663265, "signal/frontier_coverage_5/centered_abs_mean": 0.2299924910068512, "signal/frontier_coverage_5/group_std_mean": 0.29544530510902406, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032888925168663265, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032888925168663265, "step": 85 }, { "calibration/aurc": 0.33589424683068303, "calibration/batch_distribution_entropy": 0.9830515504914701, "calibration/buffer_distribution_entropy": 0.9701992389099413, "calibration/confidence_entropy": 0.46065174492897754, "calibration/coverage@0%": 0.00234375, "calibration/coverage@1%": 0.00234375, "calibration/coverage@10%": 0.05237898284313726, "calibration/coverage@15%": 0.10512558876290243, "calibration/coverage@20%": 0.13598649262787305, "calibration/coverage@25%": 0.2674431951719044, "calibration/coverage@30%": 0.37270895087487055, "calibration/coverage@5%": 0.011363357843137255, "calibration/ece": 0.13230197746702937, "calibration/mean_confidence": 0.5004645331904743, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 453.6, "completions/max_terminated_length": 453.6, "completions/mean_length": 177.79169921875, "completions/mean_terminated_length": 177.91405029296874, "completions/min_length": 31.0, "completions/min_terminated_length": 80.6, "epoch": 0.288, "grad_norm": 0.0011328975670039654, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 299181330.0, "reward": 0.9354893803596497, "reward_std": 0.10109454691410065, "rewards/accuracy_reward": 0.50869140625, "rewards/brier_reward": 0.7558692455291748, "rewards/confidence_uniqueness_reward": 0.9514668345451355, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.10887458622455597, "rewards/frontier_coverage_1": 0.10887458622455597, "rewards/frontier_coverage_10": 0.10887458622455597, "rewards/frontier_coverage_15": 0.10887458622455597, "rewards/frontier_coverage_20": 0.10887458622455597, "rewards/frontier_coverage_25": 0.10887458622455597, "rewards/frontier_coverage_5": 0.10887458622455597, "signal/accuracy_reward/centered_abs_mean": 0.139923095703125, "signal/accuracy_reward/group_std_mean": 0.1817190706729889, "signal/accuracy_reward/group_zero_std_frac": 0.490625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0699615478515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0699615478515625, "signal/advantage_abs_mean": 0.07745532691478729, "signal/advantage_pre_scale_abs_mean": 0.07745532691478729, "signal/advantage_pre_scale_std": 0.12147206962108612, "signal/advantage_std": 0.12147206962108612, "signal/brier_reward/centered_abs_mean": 0.1812896490097046, "signal/brier_reward/group_std_mean": 0.2296443372964859, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01812896430492401, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01812896430492401, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027077151462435722, "signal/confidence_uniqueness_reward/group_std_mean": 0.03699265941977501, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027077150996774437, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027077150996774437, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_std_mean": 0.005524271540343762, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_coverage_0/centered_abs_mean": 0.2429211437702179, "signal/frontier_coverage_0/group_std_mean": 0.30920409560203554, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0034737725276499988, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034737725276499988, "signal/frontier_coverage_1/centered_abs_mean": 0.2429211437702179, "signal/frontier_coverage_1/group_std_mean": 0.30920409560203554, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034737725276499988, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034737725276499988, "signal/frontier_coverage_10/centered_abs_mean": 0.2429211437702179, "signal/frontier_coverage_10/group_std_mean": 0.30920409560203554, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034737725276499988, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034737725276499988, "signal/frontier_coverage_15/centered_abs_mean": 0.2429211437702179, "signal/frontier_coverage_15/group_std_mean": 0.30920409560203554, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034737725276499988, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034737725276499988, "signal/frontier_coverage_20/centered_abs_mean": 0.2429211437702179, "signal/frontier_coverage_20/group_std_mean": 0.30920409560203554, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034737725276499988, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034737725276499988, "signal/frontier_coverage_25/centered_abs_mean": 0.2429211437702179, "signal/frontier_coverage_25/group_std_mean": 0.30920409560203554, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034737725276499988, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034737725276499988, "signal/frontier_coverage_5/centered_abs_mean": 0.2429211437702179, "signal/frontier_coverage_5/group_std_mean": 0.30920409560203554, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034737725276499988, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034737725276499988, "step": 90 }, { "calibration/aurc": 0.3082216996890913, "calibration/batch_distribution_entropy": 0.9745983733554165, "calibration/buffer_distribution_entropy": 0.9733239639462091, "calibration/confidence_entropy": 0.4559858287117121, "calibration/coverage@0%": 0.006652879901960784, "calibration/coverage@1%": 0.006652879901960784, "calibration/coverage@10%": 0.09274963367196194, "calibration/coverage@15%": 0.1577411679569088, "calibration/coverage@20%": 0.26820531037757567, "calibration/coverage@25%": 0.3374852539474694, "calibration/coverage@30%": 0.526755844461072, "calibration/coverage@5%": 0.03674938725490196, "calibration/ece": 0.14556641294167272, "calibration/mean_confidence": 0.4999809672027273, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 588.4, "completions/max_terminated_length": 588.4, "completions/mean_length": 183.04345703125, "completions/mean_terminated_length": 183.18773193359374, "completions/min_length": 15.8, "completions/min_terminated_length": 85.8, "epoch": 0.304, "grad_norm": 0.0011761499335989356, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 315985647.0, "reward": 0.9316917419433594, "reward_std": 0.09645482301712036, "rewards/accuracy_reward": 0.50341796875, "rewards/brier_reward": 0.7473922848701477, "rewards/confidence_uniqueness_reward": 0.9497161388397217, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.10749451220035552, "rewards/frontier_coverage_1": 0.10749451220035552, "rewards/frontier_coverage_10": 0.10749451220035552, "rewards/frontier_coverage_15": 0.10749451220035552, "rewards/frontier_coverage_20": 0.10749451220035552, "rewards/frontier_coverage_25": 0.10749451220035552, "rewards/frontier_coverage_5": 0.10749451220035552, "signal/accuracy_reward/centered_abs_mean": 0.131561279296875, "signal/accuracy_reward/group_std_mean": 0.17328137457370757, "signal/accuracy_reward/group_zero_std_frac": 0.50625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0657806396484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0657806396484375, "signal/advantage_abs_mean": 0.07330347746610641, "signal/advantage_pre_scale_abs_mean": 0.07330347746610641, "signal/advantage_pre_scale_std": 0.11479498445987701, "signal/advantage_std": 0.11479498445987701, "signal/brier_reward/centered_abs_mean": 0.1852072387933731, "signal/brier_reward/group_std_mean": 0.23257068395614625, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018520724028348923, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018520724028348923, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02673105485737324, "signal/confidence_uniqueness_reward/group_std_mean": 0.03708546310663223, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026731055695563555, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026731055695563555, "signal/format_reward/centered_abs_mean": 0.0018798828125, "signal/format_reward/group_std_mean": 0.005187963135540485, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00093994140625, "signal/frontier_coverage_0/centered_abs_mean": 0.2502656430006027, "signal/frontier_coverage_0/group_std_mean": 0.3177207946777344, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0035787987522780894, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035787987522780894, "signal/frontier_coverage_1/centered_abs_mean": 0.2502656430006027, "signal/frontier_coverage_1/group_std_mean": 0.3177207946777344, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035787987522780894, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035787987522780894, "signal/frontier_coverage_10/centered_abs_mean": 0.2502656430006027, "signal/frontier_coverage_10/group_std_mean": 0.3177207946777344, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035787987522780894, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035787987522780894, "signal/frontier_coverage_15/centered_abs_mean": 0.2502656430006027, "signal/frontier_coverage_15/group_std_mean": 0.3177207946777344, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035787987522780894, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035787987522780894, "signal/frontier_coverage_20/centered_abs_mean": 0.2502656430006027, "signal/frontier_coverage_20/group_std_mean": 0.3177207946777344, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035787987522780894, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035787987522780894, "signal/frontier_coverage_25/centered_abs_mean": 0.2502656430006027, "signal/frontier_coverage_25/group_std_mean": 0.3177207946777344, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035787987522780894, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035787987522780894, "signal/frontier_coverage_5/centered_abs_mean": 0.2502656430006027, "signal/frontier_coverage_5/group_std_mean": 0.3177207946777344, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035787987522780894, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035787987522780894, "step": 95 }, { "calibration/aurc": 0.2517693569418261, "calibration/batch_distribution_entropy": 0.9731332859372502, "calibration/buffer_distribution_entropy": 0.9755470528339438, "calibration/confidence_entropy": 0.44581740909361167, "calibration/coverage@0%": 0.0125, "calibration/coverage@1%": 0.0125, "calibration/coverage@10%": 0.175390625, "calibration/coverage@15%": 0.357421875, "calibration/coverage@20%": 0.455078125, "calibration/coverage@25%": 0.553515625, "calibration/coverage@30%": 0.68046875, "calibration/coverage@5%": 0.091796875, "calibration/ece": 0.1454404464407266, "calibration/mean_confidence": 0.5241102003842935, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 612.2, "completions/max_terminated_length": 612.2, "completions/mean_length": 186.9810546875, "completions/mean_terminated_length": 187.07222900390624, "completions/min_length": 51.2, "completions/min_terminated_length": 81.8, "epoch": 0.32, "grad_norm": 0.0009133943822234869, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 332989037.0, "reward": 0.9471798062324523, "reward_std": 0.082899671792984, "rewards/accuracy_reward": 0.528515625, "rewards/brier_reward": 0.767364501953125, "rewards/confidence_uniqueness_reward": 0.9538532257080078, "rewards/format_reward": 0.99951171875, "rewards/frontier_coverage_0": 0.11033322885632516, "rewards/frontier_coverage_1": 0.11033322885632516, "rewards/frontier_coverage_10": 0.11033322885632516, "rewards/frontier_coverage_15": 0.11033322885632516, "rewards/frontier_coverage_20": 0.11033322885632516, "rewards/frontier_coverage_25": 0.11033322885632516, "rewards/frontier_coverage_5": 0.11033322885632516, "signal/accuracy_reward/centered_abs_mean": 0.0957275390625, "signal/accuracy_reward/group_std_mean": 0.13498952388763427, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04786376953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04786376953125, "signal/advantage_abs_mean": 0.06182228252291679, "signal/advantage_pre_scale_abs_mean": 0.06182228252291679, "signal/advantage_pre_scale_std": 0.10263902097940444, "signal/advantage_std": 0.10263902097940444, "signal/brier_reward/centered_abs_mean": 0.1715441018342972, "signal/brier_reward/group_std_mean": 0.21891236901283265, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01715441085398197, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01715441085398197, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023247013986110687, "signal/confidence_uniqueness_reward/group_std_mean": 0.03069368377327919, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023247014032676816, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023247014032676816, "signal/format_reward/centered_abs_mean": 0.000933837890625, "signal/format_reward/group_std_mean": 0.0024258273653686045, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125, "signal/frontier_coverage_0/centered_abs_mean": 0.21744668185710908, "signal/frontier_coverage_0/group_std_mean": 0.2794205367565155, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003109487472102046, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003109487472102046, "signal/frontier_coverage_1/centered_abs_mean": 0.21744668185710908, "signal/frontier_coverage_1/group_std_mean": 0.2794205367565155, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003109487472102046, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003109487472102046, "signal/frontier_coverage_10/centered_abs_mean": 0.21744668185710908, "signal/frontier_coverage_10/group_std_mean": 0.2794205367565155, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003109487472102046, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003109487472102046, "signal/frontier_coverage_15/centered_abs_mean": 0.21744668185710908, "signal/frontier_coverage_15/group_std_mean": 0.2794205367565155, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003109487472102046, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003109487472102046, "signal/frontier_coverage_20/centered_abs_mean": 0.21744668185710908, "signal/frontier_coverage_20/group_std_mean": 0.2794205367565155, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003109487472102046, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003109487472102046, "signal/frontier_coverage_25/centered_abs_mean": 0.21744668185710908, "signal/frontier_coverage_25/group_std_mean": 0.2794205367565155, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003109487472102046, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003109487472102046, "signal/frontier_coverage_5/centered_abs_mean": 0.21744668185710908, "signal/frontier_coverage_5/group_std_mean": 0.2794205367565155, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003109487472102046, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003109487472102046, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.4931819979797931, "eval_calibration/batch_distribution_entropy": 0.8744966415146431, "eval_calibration/buffer_distribution_entropy": 0.9766198744091587, "eval_calibration/confidence_entropy": 0.41882069482680984, "eval_calibration/coverage@0%": 0.0234375, "eval_calibration/coverage@1%": 0.0234375, "eval_calibration/coverage@10%": 0.0234375, "eval_calibration/coverage@15%": 0.078125, "eval_calibration/coverage@20%": 0.078125, "eval_calibration/coverage@25%": 0.1171875, "eval_calibration/coverage@30%": 0.203125, "eval_calibration/coverage@5%": 0.0234375, "eval_calibration/ece": 0.24737018579060577, "eval_calibration/mean_confidence": 0.44411252086535724, "eval_completions/clipped_ratio": 0.002155172413793094, "eval_completions/max_length": 431.5, "eval_completions/max_terminated_length": 431.5, "eval_completions/mean_length": 193.44598770141602, "eval_completions/mean_terminated_length": 193.86253356933594, "eval_completions/min_length": 78.75, "eval_completions/min_terminated_length": 106.75, "eval_loss": 0.0, "eval_num_tokens": 332989037.0, "eval_reward": 0.8956393599510193, "eval_reward_std": 0.21798087283968925, "eval_rewards/accuracy_reward": 0.41796875, "eval_rewards/brier_reward": 0.779339388012886, "eval_rewards/confidence_uniqueness_reward": 0.891444057226181, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_coverage_0": 0.20532679185271263, "eval_rewards/frontier_coverage_1": 0.20532679185271263, "eval_rewards/frontier_coverage_10": 0.20532679185271263, "eval_rewards/frontier_coverage_15": 0.20532679185271263, "eval_rewards/frontier_coverage_20": 0.20532679185271263, "eval_rewards/frontier_coverage_25": 0.20532679185271263, "eval_rewards/frontier_coverage_5": 0.20532679185271263, "eval_runtime": 30.2085, "eval_samples_per_second": 16.552, "eval_signal/accuracy_reward/centered_abs_mean": 0.474609375, "eval_signal/accuracy_reward/group_std_mean": 0.4946169927716255, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2373046875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2373046875, "eval_signal/advantage_abs_mean": 0.19964107125997543, "eval_signal/advantage_pre_scale_abs_mean": 0.19964107125997543, "eval_signal/advantage_pre_scale_std": 0.21583576127886772, "eval_signal/advantage_std": 0.21583576127886772, "eval_signal/brier_reward/centered_abs_mean": 0.21933908015489578, "eval_signal/brier_reward/group_std_mean": 0.27718352526426315, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021933908574283123, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.021933908574283123, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.049210578203201294, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.060637932270765305, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004921057727187872, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004921057727187872, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.41015545278787613, "eval_signal/frontier_coverage_0/group_std_mean": 0.5025398880243301, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005865223123691976, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005865223123691976, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.41015545278787613, "eval_signal/frontier_coverage_1/group_std_mean": 0.5025398880243301, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005865223123691976, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005865223123691976, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.41015545278787613, "eval_signal/frontier_coverage_10/group_std_mean": 0.5025398880243301, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005865223123691976, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005865223123691976, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.41015545278787613, "eval_signal/frontier_coverage_15/group_std_mean": 0.5025398880243301, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005865223123691976, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005865223123691976, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.41015545278787613, "eval_signal/frontier_coverage_20/group_std_mean": 0.5025398880243301, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005865223123691976, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005865223123691976, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.41015545278787613, "eval_signal/frontier_coverage_25/group_std_mean": 0.5025398880243301, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005865223123691976, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005865223123691976, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.41015545278787613, "eval_signal/frontier_coverage_5/group_std_mean": 0.5025398880243301, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005865223123691976, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005865223123691976, "eval_steps_per_second": 0.132, "step": 100 }, { "calibration/aurc": 0.31803164040980525, "calibration/batch_distribution_entropy": 0.9603039380902738, "calibration/buffer_distribution_entropy": 0.9789385627675109, "calibration/confidence_entropy": 0.4421431351101009, "calibration/coverage@0%": 0.004296875, "calibration/coverage@1%": 0.004296875, "calibration/coverage@10%": 0.01328125, "calibration/coverage@15%": 0.06171875, "calibration/coverage@20%": 0.12617951932485322, "calibration/coverage@25%": 0.32703644814090016, "calibration/coverage@30%": 0.5084095217710372, "calibration/coverage@5%": 0.004296875, "calibration/ece": 0.1597455389352755, "calibration/mean_confidence": 0.4623863560801051, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 586.4, "completions/max_terminated_length": 586.4, "completions/mean_length": 194.1740234375, "completions/mean_terminated_length": 194.2508087158203, "completions/min_length": 16.8, "completions/min_terminated_length": 83.4, "epoch": 0.336, "grad_norm": 0.0010002412600442767, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 349699811.0, "reward": 0.946380615234375, "reward_std": 0.09110937118530274, "rewards/accuracy_reward": 0.53134765625, "rewards/brier_reward": 0.7578034162521362, "rewards/confidence_uniqueness_reward": 0.953113317489624, "rewards/format_reward": 0.99931640625, "rewards/frontier_coverage_0": 0.09946960732340812, "rewards/frontier_coverage_1": 0.09946960732340812, "rewards/frontier_coverage_10": 0.09946960732340812, "rewards/frontier_coverage_15": 0.09946960732340812, "rewards/frontier_coverage_20": 0.09946960732340812, "rewards/frontier_coverage_25": 0.09946960732340812, "rewards/frontier_coverage_5": 0.09946960732340812, "signal/accuracy_reward/centered_abs_mean": 0.113885498046875, "signal/accuracy_reward/group_std_mean": 0.15445185005664824, "signal/accuracy_reward/group_zero_std_frac": 0.540625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0569427490234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0569427490234375, "signal/advantage_abs_mean": 0.06859076172113418, "signal/advantage_pre_scale_abs_mean": 0.06859076172113418, "signal/advantage_pre_scale_std": 0.11051983535289764, "signal/advantage_std": 0.11051983535289764, "signal/brier_reward/centered_abs_mean": 0.17820558547973633, "signal/brier_reward/group_std_mean": 0.22512085735797882, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017820559069514276, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017820559069514276, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023565568774938584, "signal/confidence_uniqueness_reward/group_std_mean": 0.03247289955615997, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023565569426864386, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023565569426864386, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.0038669900968670845, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_coverage_0/centered_abs_mean": 0.2296843409538269, "signal/frontier_coverage_0/group_std_mean": 0.2940028965473175, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003284486150369048, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003284486150369048, "signal/frontier_coverage_1/centered_abs_mean": 0.2296843409538269, "signal/frontier_coverage_1/group_std_mean": 0.2940028965473175, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003284486150369048, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003284486150369048, "signal/frontier_coverage_10/centered_abs_mean": 0.2296843409538269, "signal/frontier_coverage_10/group_std_mean": 0.2940028965473175, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003284486150369048, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003284486150369048, "signal/frontier_coverage_15/centered_abs_mean": 0.2296843409538269, "signal/frontier_coverage_15/group_std_mean": 0.2940028965473175, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003284486150369048, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003284486150369048, "signal/frontier_coverage_20/centered_abs_mean": 0.2296843409538269, "signal/frontier_coverage_20/group_std_mean": 0.2940028965473175, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003284486150369048, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003284486150369048, "signal/frontier_coverage_25/centered_abs_mean": 0.2296843409538269, "signal/frontier_coverage_25/group_std_mean": 0.2940028965473175, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003284486150369048, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003284486150369048, "signal/frontier_coverage_5/centered_abs_mean": 0.2296843409538269, "signal/frontier_coverage_5/group_std_mean": 0.2940028965473175, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003284486150369048, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003284486150369048, "step": 105 }, { "calibration/aurc": 0.33995086829546933, "calibration/batch_distribution_entropy": 0.9172722143051842, "calibration/buffer_distribution_entropy": 0.9850987655947868, "calibration/confidence_entropy": 0.39972877336110646, "calibration/coverage@0%": 0.006271440082882468, "calibration/coverage@1%": 0.006271440082882468, "calibration/coverage@10%": 0.1462585299429609, "calibration/coverage@15%": 0.268205854981136, "calibration/coverage@20%": 0.3480707272055084, "calibration/coverage@25%": 0.40918934329610385, "calibration/coverage@30%": 0.46561599619623495, "calibration/coverage@5%": 0.014506734200529527, "calibration/ece": 0.14900923213787592, "calibration/mean_confidence": 0.42059892334335897, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 620.8, "completions/max_terminated_length": 620.8, "completions/mean_length": 200.08525390625, "completions/mean_terminated_length": 200.34373168945314, "completions/min_length": 17.2, "completions/min_terminated_length": 92.2, "epoch": 0.352, "grad_norm": 0.001090504345484078, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 367009100.0, "reward": 0.9180683374404908, "reward_std": 0.09404533058404922, "rewards/accuracy_reward": 0.4623046875, "rewards/brier_reward": 0.7684449315071106, "rewards/confidence_uniqueness_reward": 0.9460180521011352, "rewards/format_reward": 0.99853515625, "rewards/frontier_coverage_0": 0.16185927987098694, "rewards/frontier_coverage_1": 0.16185927987098694, "rewards/frontier_coverage_10": 0.16185927987098694, "rewards/frontier_coverage_15": 0.16185927987098694, "rewards/frontier_coverage_20": 0.16185927987098694, "rewards/frontier_coverage_25": 0.16185927987098694, "rewards/frontier_coverage_5": 0.16185927987098694, "signal/accuracy_reward/centered_abs_mean": 0.11962890625, "signal/accuracy_reward/group_std_mean": 0.1559920936822891, "signal/accuracy_reward/group_zero_std_frac": 0.559375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.059814453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.059814453125, "signal/advantage_abs_mean": 0.0710756614804268, "signal/advantage_pre_scale_abs_mean": 0.0710756614804268, "signal/advantage_pre_scale_std": 0.11461780071258545, "signal/advantage_std": 0.11461780071258545, "signal/brier_reward/centered_abs_mean": 0.17437887489795684, "signal/brier_reward/group_std_mean": 0.2231445223093033, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01743788719177246, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01743788719177246, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02977934628725052, "signal/confidence_uniqueness_reward/group_std_mean": 0.0412301205098629, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029779347125440834, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029779347125440834, "signal/format_reward/centered_abs_mean": 0.002813720703125, "signal/format_reward/group_std_mean": 0.007613790640607476, "signal/format_reward/group_zero_std_frac": 0.959375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014068603515625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0014068603515625, "signal/frontier_coverage_0/centered_abs_mean": 0.2336806982755661, "signal/frontier_coverage_0/group_std_mean": 0.2980573236942291, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003341634012758732, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003341634012758732, "signal/frontier_coverage_1/centered_abs_mean": 0.2336806982755661, "signal/frontier_coverage_1/group_std_mean": 0.2980573236942291, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003341634012758732, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003341634012758732, "signal/frontier_coverage_10/centered_abs_mean": 0.2336806982755661, "signal/frontier_coverage_10/group_std_mean": 0.2980573236942291, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003341634012758732, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003341634012758732, "signal/frontier_coverage_15/centered_abs_mean": 0.2336806982755661, "signal/frontier_coverage_15/group_std_mean": 0.2980573236942291, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003341634012758732, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003341634012758732, "signal/frontier_coverage_20/centered_abs_mean": 0.2336806982755661, "signal/frontier_coverage_20/group_std_mean": 0.2980573236942291, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003341634012758732, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003341634012758732, "signal/frontier_coverage_25/centered_abs_mean": 0.2336806982755661, "signal/frontier_coverage_25/group_std_mean": 0.2980573236942291, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003341634012758732, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003341634012758732, "signal/frontier_coverage_5/centered_abs_mean": 0.2336806982755661, "signal/frontier_coverage_5/group_std_mean": 0.2980573236942291, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003341634012758732, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003341634012758732, "step": 110 }, { "calibration/aurc": 0.3871581697347807, "calibration/batch_distribution_entropy": 0.9375974113072278, "calibration/buffer_distribution_entropy": 0.9893474272345537, "calibration/confidence_entropy": 0.4084952818758949, "calibration/coverage@0%": 0.00078125, "calibration/coverage@1%": 0.00078125, "calibration/coverage@10%": 0.00078125, "calibration/coverage@15%": 0.008203125, "calibration/coverage@20%": 0.17043328033268101, "calibration/coverage@25%": 0.3175276724559687, "calibration/coverage@30%": 0.4125, "calibration/coverage@5%": 0.00078125, "calibration/ece": 0.16002306553664605, "calibration/mean_confidence": 0.5115657670381608, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00146484375, "completions/max_length": 702.2, "completions/max_terminated_length": 702.2, "completions/mean_length": 202.5775390625, "completions/mean_terminated_length": 202.87572021484374, "completions/min_length": 0.0, "completions/min_terminated_length": 95.2, "epoch": 0.368, "grad_norm": 0.0008818231872282922, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 384148966.0, "reward": 0.9308542728424072, "reward_std": 0.09076899141073227, "rewards/accuracy_reward": 0.49150390625, "rewards/brier_reward": 0.7683249115943909, "rewards/confidence_uniqueness_reward": 0.9473673701286316, "rewards/format_reward": 0.9984375, "rewards/frontier_coverage_0": 0.14300051778554917, "rewards/frontier_coverage_1": 0.14300051778554917, "rewards/frontier_coverage_10": 0.14300051778554917, "rewards/frontier_coverage_15": 0.14300051778554917, "rewards/frontier_coverage_20": 0.14300051778554917, "rewards/frontier_coverage_25": 0.14300051778554917, "rewards/frontier_coverage_5": 0.14300051778554917, "signal/accuracy_reward/centered_abs_mean": 0.110247802734375, "signal/accuracy_reward/group_std_mean": 0.14572769552469253, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0551239013671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0551239013671875, "signal/advantage_abs_mean": 0.06792233437299729, "signal/advantage_pre_scale_abs_mean": 0.06792233437299729, "signal/advantage_pre_scale_std": 0.11378230005502701, "signal/advantage_std": 0.11378230005502701, "signal/brier_reward/centered_abs_mean": 0.17012497782707214, "signal/brier_reward/group_std_mean": 0.21865971982479096, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017012498155236245, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017012498155236245, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028270235285162926, "signal/confidence_uniqueness_reward/group_std_mean": 0.03985480517148972, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002827023435384035, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002827023435384035, "signal/format_reward/centered_abs_mean": 0.00301513671875, "signal/format_reward/group_std_mean": 0.008502526115626097, "signal/format_reward/group_zero_std_frac": 0.953125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001507568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001507568359375, "signal/frontier_coverage_0/centered_abs_mean": 0.21495278179645538, "signal/frontier_coverage_0/group_std_mean": 0.27791267037391665, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0030738247092813253, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030738247092813253, "signal/frontier_coverage_1/centered_abs_mean": 0.21495278179645538, "signal/frontier_coverage_1/group_std_mean": 0.27791267037391665, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030738247092813253, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030738247092813253, "signal/frontier_coverage_10/centered_abs_mean": 0.21495278179645538, "signal/frontier_coverage_10/group_std_mean": 0.27791267037391665, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030738247092813253, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030738247092813253, "signal/frontier_coverage_15/centered_abs_mean": 0.21495278179645538, "signal/frontier_coverage_15/group_std_mean": 0.27791267037391665, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030738247092813253, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030738247092813253, "signal/frontier_coverage_20/centered_abs_mean": 0.21495278179645538, "signal/frontier_coverage_20/group_std_mean": 0.27791267037391665, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030738247092813253, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030738247092813253, "signal/frontier_coverage_25/centered_abs_mean": 0.21495278179645538, "signal/frontier_coverage_25/group_std_mean": 0.27791267037391665, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030738247092813253, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030738247092813253, "signal/frontier_coverage_5/centered_abs_mean": 0.21495278179645538, "signal/frontier_coverage_5/group_std_mean": 0.27791267037391665, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030738247092813253, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030738247092813253, "step": 115 }, { "calibration/aurc": 0.33271643651355365, "calibration/batch_distribution_entropy": 0.9126303336176506, "calibration/buffer_distribution_entropy": 0.9914148505835181, "calibration/confidence_entropy": 0.39177509431275864, "calibration/coverage@0%": 0.016037841524134212, "calibration/coverage@1%": 0.016037841524134212, "calibration/coverage@10%": 0.15238588073982048, "calibration/coverage@15%": 0.23256510868099695, "calibration/coverage@20%": 0.2900405988770754, "calibration/coverage@25%": 0.332675402798644, "calibration/coverage@30%": 0.38508195917119303, "calibration/coverage@5%": 0.07093980230844793, "calibration/ece": 0.14636101944079075, "calibration/mean_confidence": 0.45591531154083825, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00146484375, "completions/max_length": 754.0, "completions/max_terminated_length": 754.0, "completions/mean_length": 201.73955078125, "completions/mean_terminated_length": 202.03497009277345, "completions/min_length": 0.0, "completions/min_terminated_length": 83.4, "epoch": 0.384, "grad_norm": 0.0009589268011040986, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 401071291.0, "reward": 0.9465407133102417, "reward_std": 0.08997839242219925, "rewards/accuracy_reward": 0.521875, "rewards/brier_reward": 0.7804174304008484, "rewards/confidence_uniqueness_reward": 0.9465129852294922, "rewards/format_reward": 0.99853515625, "rewards/frontier_coverage_0": 0.13628980442881583, "rewards/frontier_coverage_1": 0.13628980442881583, "rewards/frontier_coverage_10": 0.13628980442881583, "rewards/frontier_coverage_15": 0.13628980442881583, "rewards/frontier_coverage_20": 0.13628980442881583, "rewards/frontier_coverage_25": 0.13628980442881583, "rewards/frontier_coverage_5": 0.13628980442881583, "signal/accuracy_reward/centered_abs_mean": 0.10885009765625, "signal/accuracy_reward/group_std_mean": 0.15104981660842895, "signal/accuracy_reward/group_zero_std_frac": 0.546875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.054425048828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.054425048828125, "signal/advantage_abs_mean": 0.06531496718525887, "signal/advantage_pre_scale_abs_mean": 0.06531496718525887, "signal/advantage_pre_scale_std": 0.11181586831808091, "signal/advantage_std": 0.11181586831808091, "signal/brier_reward/centered_abs_mean": 0.16072153747081758, "signal/brier_reward/group_std_mean": 0.20803788006305696, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016072153858840466, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016072153858840466, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027308131381869317, "signal/confidence_uniqueness_reward/group_std_mean": 0.0376481682062149, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027308131102472544, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027308131102472544, "signal/format_reward/centered_abs_mean": 0.002789306640625, "signal/format_reward/group_std_mean": 0.0069411737378686665, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0013946533203125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0013946533203125, "signal/frontier_coverage_0/centered_abs_mean": 0.2067556768655777, "signal/frontier_coverage_0/group_std_mean": 0.26987250447273253, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002956606028601527, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002956606028601527, "signal/frontier_coverage_1/centered_abs_mean": 0.2067556768655777, "signal/frontier_coverage_1/group_std_mean": 0.26987250447273253, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002956606028601527, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002956606028601527, "signal/frontier_coverage_10/centered_abs_mean": 0.2067556768655777, "signal/frontier_coverage_10/group_std_mean": 0.26987250447273253, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002956606028601527, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002956606028601527, "signal/frontier_coverage_15/centered_abs_mean": 0.2067556768655777, "signal/frontier_coverage_15/group_std_mean": 0.26987250447273253, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002956606028601527, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002956606028601527, "signal/frontier_coverage_20/centered_abs_mean": 0.2067556768655777, "signal/frontier_coverage_20/group_std_mean": 0.26987250447273253, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002956606028601527, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002956606028601527, "signal/frontier_coverage_25/centered_abs_mean": 0.2067556768655777, "signal/frontier_coverage_25/group_std_mean": 0.26987250447273253, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002956606028601527, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002956606028601527, "signal/frontier_coverage_5/centered_abs_mean": 0.2067556768655777, "signal/frontier_coverage_5/group_std_mean": 0.26987250447273253, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002956606028601527, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002956606028601527, "step": 120 }, { "calibration/aurc": 0.42876697440671235, "calibration/batch_distribution_entropy": 0.949518661468977, "calibration/buffer_distribution_entropy": 0.9917979609259282, "calibration/confidence_entropy": 0.42313672076600994, "calibration/coverage@0%": 0.005087316176470588, "calibration/coverage@1%": 0.005087316176470588, "calibration/coverage@10%": 0.005087316176470588, "calibration/coverage@15%": 0.019583333333333335, "calibration/coverage@20%": 0.023880208333333333, "calibration/coverage@25%": 0.048635982184375724, "calibration/coverage@30%": 0.22175975306130358, "calibration/coverage@5%": 0.005087316176470588, "calibration/ece": 0.2017280795755636, "calibration/mean_confidence": 0.48690404596470865, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00146484375, "completions/max_length": 497.6, "completions/max_terminated_length": 497.6, "completions/mean_length": 198.98349609375, "completions/mean_terminated_length": 199.2773651123047, "completions/min_length": 0.0, "completions/min_terminated_length": 92.2, "epoch": 0.4, "grad_norm": 0.0010078635532408953, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 418145330.0, "reward": 0.9326099157333374, "reward_std": 0.09904382973909379, "rewards/accuracy_reward": 0.49951171875, "rewards/brier_reward": 0.7598520636558532, "rewards/confidence_uniqueness_reward": 0.9458034992218017, "rewards/format_reward": 0.9984375, "rewards/frontier_coverage_0": 0.1308102782815695, "rewards/frontier_coverage_1": 0.1308102782815695, "rewards/frontier_coverage_10": 0.1308102782815695, "rewards/frontier_coverage_15": 0.1308102782815695, "rewards/frontier_coverage_20": 0.1308102782815695, "rewards/frontier_coverage_25": 0.12910652123391628, "rewards/frontier_coverage_5": 0.1308102782815695, "signal/accuracy_reward/centered_abs_mean": 0.128631591796875, "signal/accuracy_reward/group_std_mean": 0.17192812263965607, "signal/accuracy_reward/group_zero_std_frac": 0.496875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0643157958984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0643157958984375, "signal/advantage_abs_mean": 0.07444410920143127, "signal/advantage_pre_scale_abs_mean": 0.07444410920143127, "signal/advantage_pre_scale_std": 0.12136494815349579, "signal/advantage_std": 0.12136494815349579, "signal/brier_reward/centered_abs_mean": 0.1757282793521881, "signal/brier_reward/group_std_mean": 0.22406087815761566, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017572828009724616, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017572828009724616, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027075739949941634, "signal/confidence_uniqueness_reward/group_std_mean": 0.03780955001711846, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027075740043073894, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027075740043073894, "signal/format_reward/centered_abs_mean": 0.0029541015625, "signal/format_reward/group_std_mean": 0.007679159566760063, "signal/format_reward/group_zero_std_frac": 0.959375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00147705078125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00147705078125, "signal/frontier_coverage_0/centered_abs_mean": 0.21751558780670166, "signal/frontier_coverage_0/group_std_mean": 0.28371057510375974, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0031104729045182467, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031104729045182467, "signal/frontier_coverage_1/centered_abs_mean": 0.21751558780670166, "signal/frontier_coverage_1/group_std_mean": 0.28371057510375974, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031104729045182467, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031104729045182467, "signal/frontier_coverage_10/centered_abs_mean": 0.21751558780670166, "signal/frontier_coverage_10/group_std_mean": 0.28371057510375974, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031104729045182467, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031104729045182467, "signal/frontier_coverage_15/centered_abs_mean": 0.21751558780670166, "signal/frontier_coverage_15/group_std_mean": 0.28371057510375974, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031104729045182467, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031104729045182467, "signal/frontier_coverage_20/centered_abs_mean": 0.21751558780670166, "signal/frontier_coverage_20/group_std_mean": 0.28371057510375974, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031104729045182467, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031104729045182467, "signal/frontier_coverage_25/centered_abs_mean": 0.21508013904094697, "signal/frontier_coverage_25/group_std_mean": 0.2806576728820801, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003075646050274372, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003075646050274372, "signal/frontier_coverage_5/centered_abs_mean": 0.21751558780670166, "signal/frontier_coverage_5/group_std_mean": 0.28371057510375974, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031104729045182467, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031104729045182467, "step": 125 }, { "calibration/aurc": 0.31364459658875965, "calibration/batch_distribution_entropy": 0.9483515066753176, "calibration/buffer_distribution_entropy": 0.9914676447275372, "calibration/confidence_entropy": 0.4294093524780046, "calibration/coverage@0%": 0.004694406925343812, "calibration/coverage@1%": 0.004694406925343812, "calibration/coverage@10%": 0.03828815692534381, "calibration/coverage@15%": 0.0996162819253438, "calibration/coverage@20%": 0.18711628192534382, "calibration/coverage@25%": 0.3008249938605108, "calibration/coverage@30%": 0.40244044695481335, "calibration/coverage@5%": 0.004694406925343812, "calibration/ece": 0.130821016745361, "calibration/mean_confidence": 0.5221690310011222, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 717.4, "completions/max_terminated_length": 717.4, "completions/mean_length": 199.20458984375, "completions/mean_terminated_length": 199.45704650878906, "completions/min_length": 0.0, "completions/min_terminated_length": 88.4, "epoch": 0.416, "grad_norm": 0.0010729380883276463, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 435066369.0, "reward": 0.9395390391349793, "reward_std": 0.09434663653373718, "rewards/accuracy_reward": 0.51064453125, "rewards/brier_reward": 0.7705135226249695, "rewards/confidence_uniqueness_reward": 0.9472472310066223, "rewards/format_reward": 0.9986328125, "rewards/frontier_coverage_0": 0.13238348066806793, "rewards/frontier_coverage_1": 0.13238348066806793, "rewards/frontier_coverage_10": 0.13238348066806793, "rewards/frontier_coverage_15": 0.13238348066806793, "rewards/frontier_coverage_20": 0.13238348066806793, "rewards/frontier_coverage_25": 0.12348232418298721, "rewards/frontier_coverage_5": 0.13238348066806793, "signal/accuracy_reward/centered_abs_mean": 0.124066162109375, "signal/accuracy_reward/group_std_mean": 0.15881072282791137, "signal/accuracy_reward/group_zero_std_frac": 0.5625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0620330810546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0620330810546875, "signal/advantage_abs_mean": 0.0726077377796173, "signal/advantage_pre_scale_abs_mean": 0.0726077377796173, "signal/advantage_pre_scale_std": 0.11807905584573745, "signal/advantage_std": 0.11807905584573745, "signal/brier_reward/centered_abs_mean": 0.17476985454559327, "signal/brier_reward/group_std_mean": 0.22144999206066132, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01747698597609997, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01747698597609997, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026467961445450782, "signal/confidence_uniqueness_reward/group_std_mean": 0.03703402951359749, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002646796219050884, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002646796219050884, "signal/format_reward/centered_abs_mean": 0.00264892578125, "signal/format_reward/group_std_mean": 0.007733980286866426, "signal/format_reward/group_zero_std_frac": 0.95625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001324462890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001324462890625, "signal/frontier_coverage_0/centered_abs_mean": 0.22557145953178406, "signal/frontier_coverage_0/group_std_mean": 0.287382698059082, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0032256717327982186, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0032256717327982186, "signal/frontier_coverage_1/centered_abs_mean": 0.22557145953178406, "signal/frontier_coverage_1/group_std_mean": 0.287382698059082, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032256717327982186, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032256717327982186, "signal/frontier_coverage_10/centered_abs_mean": 0.22557145953178406, "signal/frontier_coverage_10/group_std_mean": 0.287382698059082, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032256717327982186, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032256717327982186, "signal/frontier_coverage_15/centered_abs_mean": 0.22557145953178406, "signal/frontier_coverage_15/group_std_mean": 0.287382698059082, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032256717327982186, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032256717327982186, "signal/frontier_coverage_20/centered_abs_mean": 0.22557145953178406, "signal/frontier_coverage_20/group_std_mean": 0.287382698059082, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032256717327982186, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032256717327982186, "signal/frontier_coverage_25/centered_abs_mean": 0.2027723640203476, "signal/frontier_coverage_25/group_std_mean": 0.25904818475246427, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002899644710123539, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002899644710123539, "signal/frontier_coverage_5/centered_abs_mean": 0.22557145953178406, "signal/frontier_coverage_5/group_std_mean": 0.287382698059082, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032256717327982186, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032256717327982186, "step": 130 }, { "calibration/aurc": 0.27631575926441354, "calibration/batch_distribution_entropy": 0.9411343032566624, "calibration/buffer_distribution_entropy": 0.9906036163305982, "calibration/confidence_entropy": 0.4103170827497781, "calibration/coverage@0%": 0.00703125, "calibration/coverage@1%": 0.00703125, "calibration/coverage@10%": 0.107421875, "calibration/coverage@15%": 0.148828125, "calibration/coverage@20%": 0.3464935661764706, "calibration/coverage@25%": 0.42737745098039215, "calibration/coverage@30%": 0.5640977328431372, "calibration/coverage@5%": 0.03203125, "calibration/ece": 0.12333647071900052, "calibration/mean_confidence": 0.5359197657645645, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 630.6, "completions/max_terminated_length": 630.6, "completions/mean_length": 193.8431640625, "completions/mean_terminated_length": 193.9956481933594, "completions/min_length": 18.0, "completions/min_terminated_length": 89.2, "epoch": 0.432, "grad_norm": 0.0013667960884049535, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 452065659.0, "reward": 0.9584846138954163, "reward_std": 0.08736461549997329, "rewards/accuracy_reward": 0.54599609375, "rewards/brier_reward": 0.7880683302879333, "rewards/confidence_uniqueness_reward": 0.9461393594741822, "rewards/format_reward": 0.99912109375, "rewards/frontier_coverage_0": 0.127962838858366, "rewards/frontier_coverage_1": 0.127962838858366, "rewards/frontier_coverage_10": 0.127962838858366, "rewards/frontier_coverage_15": 0.127962838858366, "rewards/frontier_coverage_20": 0.127962838858366, "rewards/frontier_coverage_25": 0.10671568959951401, "rewards/frontier_coverage_5": 0.127962838858366, "signal/accuracy_reward/centered_abs_mean": 0.118438720703125, "signal/accuracy_reward/group_std_mean": 0.15208634585142136, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0592193603515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0592193603515625, "signal/advantage_abs_mean": 0.06664500907063484, "signal/advantage_pre_scale_abs_mean": 0.06664500907063484, "signal/advantage_pre_scale_std": 0.11240910291671753, "signal/advantage_std": 0.11240910291671753, "signal/brier_reward/centered_abs_mean": 0.1604565739631653, "signal/brier_reward/group_std_mean": 0.20421489775180818, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01604565791785717, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01604565791785717, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027118064090609552, "signal/confidence_uniqueness_reward/group_std_mean": 0.037455400079488756, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027118063997477295, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027118063997477295, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.0049718443769961596, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_coverage_0/centered_abs_mean": 0.21060441732406615, "signal/frontier_coverage_0/group_std_mean": 0.269395238161087, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0030116431415081026, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030116431415081026, "signal/frontier_coverage_1/centered_abs_mean": 0.21060441732406615, "signal/frontier_coverage_1/group_std_mean": 0.269395238161087, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030116431415081026, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030116431415081026, "signal/frontier_coverage_10/centered_abs_mean": 0.21060441732406615, "signal/frontier_coverage_10/group_std_mean": 0.269395238161087, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030116431415081026, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030116431415081026, "signal/frontier_coverage_15/centered_abs_mean": 0.21060441732406615, "signal/frontier_coverage_15/group_std_mean": 0.269395238161087, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030116431415081026, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030116431415081026, "signal/frontier_coverage_20/centered_abs_mean": 0.21060441732406615, "signal/frontier_coverage_20/group_std_mean": 0.269395238161087, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030116431415081026, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030116431415081026, "signal/frontier_coverage_25/centered_abs_mean": 0.16856006383895875, "signal/frontier_coverage_25/group_std_mean": 0.21680730879306792, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002410408854484558, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002410408854484558, "signal/frontier_coverage_5/centered_abs_mean": 0.21060441732406615, "signal/frontier_coverage_5/group_std_mean": 0.269395238161087, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030116431415081026, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030116431415081026, "step": 135 }, { "calibration/aurc": 0.28610566413293276, "calibration/batch_distribution_entropy": 0.941972818254718, "calibration/buffer_distribution_entropy": 0.9895541282721396, "calibration/confidence_entropy": 0.4368501676028683, "calibration/coverage@0%": 0.013307240704500978, "calibration/coverage@1%": 0.013307240704500978, "calibration/coverage@10%": 0.07397260273972603, "calibration/coverage@15%": 0.17565970523483365, "calibration/coverage@20%": 0.24409781678082193, "calibration/coverage@25%": 0.35203797700587086, "calibration/coverage@30%": 0.501732968444227, "calibration/coverage@5%": 0.05636007827788649, "calibration/ece": 0.148974853332463, "calibration/mean_confidence": 0.5850199136094146, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 609.4, "completions/max_terminated_length": 609.4, "completions/mean_length": 197.78876953125, "completions/mean_terminated_length": 197.92320861816407, "completions/min_length": 18.0, "completions/min_terminated_length": 89.6, "epoch": 0.448, "grad_norm": 0.0012304031988605857, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 469043816.0, "reward": 0.9455674409866333, "reward_std": 0.08576287478208541, "rewards/accuracy_reward": 0.51591796875, "rewards/brier_reward": 0.7885148882865906, "rewards/confidence_uniqueness_reward": 0.9510594606399536, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.145331272482872, "rewards/frontier_coverage_1": 0.145331272482872, "rewards/frontier_coverage_10": 0.145331272482872, "rewards/frontier_coverage_15": 0.145331272482872, "rewards/frontier_coverage_20": 0.1450465127825737, "rewards/frontier_coverage_25": 0.11706055402755737, "rewards/frontier_coverage_5": 0.145331272482872, "signal/accuracy_reward/centered_abs_mean": 0.109393310546875, "signal/accuracy_reward/group_std_mean": 0.1406890869140625, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0546966552734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0546966552734375, "signal/advantage_abs_mean": 0.06536918431520462, "signal/advantage_pre_scale_abs_mean": 0.06536918431520462, "signal/advantage_pre_scale_std": 0.11072720885276795, "signal/advantage_std": 0.11072720885276795, "signal/brier_reward/centered_abs_mean": 0.15777938365936278, "signal/brier_reward/group_std_mean": 0.20213670134544373, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01577793899923563, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01577793899923563, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0231874518096447, "signal/confidence_uniqueness_reward/group_std_mean": 0.0322634294629097, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023187451995909216, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023187451995909216, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_std_mean": 0.005524271493777632, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_coverage_0/centered_abs_mean": 0.2044217973947525, "signal/frontier_coverage_0/group_std_mean": 0.2626798987388611, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0029232318513095377, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029232318513095377, "signal/frontier_coverage_1/centered_abs_mean": 0.2044217973947525, "signal/frontier_coverage_1/group_std_mean": 0.2626798987388611, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029232318513095377, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029232318513095377, "signal/frontier_coverage_10/centered_abs_mean": 0.2044217973947525, "signal/frontier_coverage_10/group_std_mean": 0.2626798987388611, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029232318513095377, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029232318513095377, "signal/frontier_coverage_15/centered_abs_mean": 0.2044217973947525, "signal/frontier_coverage_15/group_std_mean": 0.2626798987388611, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029232318513095377, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029232318513095377, "signal/frontier_coverage_20/centered_abs_mean": 0.2033557653427124, "signal/frontier_coverage_20/group_std_mean": 0.2613858848810196, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029079874977469443, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029079874977469443, "signal/frontier_coverage_25/centered_abs_mean": 0.15121191143989562, "signal/frontier_coverage_25/group_std_mean": 0.1960006058216095, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021623303182423115, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021623303182423115, "signal/frontier_coverage_5/centered_abs_mean": 0.2044217973947525, "signal/frontier_coverage_5/group_std_mean": 0.2626798987388611, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029232318513095377, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029232318513095377, "step": 140 }, { "calibration/aurc": 0.41597604928666837, "calibration/batch_distribution_entropy": 0.9587108118403496, "calibration/buffer_distribution_entropy": 0.9883232106890395, "calibration/confidence_entropy": 0.4468210218563723, "calibration/coverage@0%": 0.005078125, "calibration/coverage@1%": 0.005078125, "calibration/coverage@10%": 0.005078125, "calibration/coverage@15%": 0.00546875, "calibration/coverage@20%": 0.059765625, "calibration/coverage@25%": 0.09765625, "calibration/coverage@30%": 0.26015625, "calibration/coverage@5%": 0.005078125, "calibration/ece": 0.15567187863866366, "calibration/mean_confidence": 0.5199681562651445, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 530.8, "completions/max_terminated_length": 530.8, "completions/mean_length": 198.5326171875, "completions/mean_terminated_length": 198.66876831054688, "completions/min_length": 35.6, "completions/min_terminated_length": 88.8, "epoch": 0.464, "grad_norm": 0.0010460478952154517, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 486247606.0, "reward": 0.9150034546852112, "reward_std": 0.08174609690904618, "rewards/accuracy_reward": 0.4603515625, "rewards/brier_reward": 0.7551176190376282, "rewards/confidence_uniqueness_reward": 0.949243712425232, "rewards/format_reward": 0.99912109375, "rewards/frontier_coverage_0": 0.15224866196513176, "rewards/frontier_coverage_1": 0.15224866196513176, "rewards/frontier_coverage_10": 0.15224866196513176, "rewards/frontier_coverage_15": 0.15224866196513176, "rewards/frontier_coverage_20": 0.15224866196513176, "rewards/frontier_coverage_25": 0.12363973185420037, "rewards/frontier_coverage_5": 0.15224866196513176, "signal/accuracy_reward/centered_abs_mean": 0.0911376953125, "signal/accuracy_reward/group_std_mean": 0.12455651462078095, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04556884765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04556884765625, "signal/advantage_abs_mean": 0.06125259175896645, "signal/advantage_pre_scale_abs_mean": 0.06125259175896645, "signal/advantage_pre_scale_std": 0.10643679648637772, "signal/advantage_std": 0.10643679648637772, "signal/brier_reward/centered_abs_mean": 0.16171522736549376, "signal/brier_reward/group_std_mean": 0.20533936619758605, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016171522811055182, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016171522811055182, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02406933158636093, "signal/confidence_uniqueness_reward/group_std_mean": 0.03289683237671852, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024069331819191577, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024069331819191577, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.004971844470128417, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_coverage_0/centered_abs_mean": 0.1879375845193863, "signal/frontier_coverage_0/group_std_mean": 0.2440842032432556, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002687507402151823, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002687507402151823, "signal/frontier_coverage_1/centered_abs_mean": 0.1879375845193863, "signal/frontier_coverage_1/group_std_mean": 0.2440842032432556, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002687507402151823, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002687507402151823, "signal/frontier_coverage_10/centered_abs_mean": 0.1879375845193863, "signal/frontier_coverage_10/group_std_mean": 0.2440842032432556, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002687507402151823, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002687507402151823, "signal/frontier_coverage_15/centered_abs_mean": 0.1879375845193863, "signal/frontier_coverage_15/group_std_mean": 0.2440842032432556, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002687507402151823, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002687507402151823, "signal/frontier_coverage_20/centered_abs_mean": 0.1879375845193863, "signal/frontier_coverage_20/group_std_mean": 0.2440842032432556, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002687507402151823, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002687507402151823, "signal/frontier_coverage_25/centered_abs_mean": 0.14551771879196168, "signal/frontier_coverage_25/group_std_mean": 0.19012450873851777, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020809032954275607, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020809032954275607, "signal/frontier_coverage_5/centered_abs_mean": 0.1879375845193863, "signal/frontier_coverage_5/group_std_mean": 0.2440842032432556, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002687507402151823, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002687507402151823, "step": 145 }, { "calibration/aurc": 0.2966352076482409, "calibration/batch_distribution_entropy": 0.9417444696033105, "calibration/buffer_distribution_entropy": 0.9854769117447294, "calibration/confidence_entropy": 0.4245164194763583, "calibration/coverage@0%": 0.0019561827299412913, "calibration/coverage@1%": 0.0019561827299412913, "calibration/coverage@10%": 0.04687805772994129, "calibration/coverage@15%": 0.06523743272994129, "calibration/coverage@20%": 0.26094208659491197, "calibration/coverage@25%": 0.44967205846379643, "calibration/coverage@30%": 0.5446397994129158, "calibration/coverage@5%": 0.0019561827299412913, "calibration/ece": 0.14131419788242233, "calibration/mean_confidence": 0.5137820425893411, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 758.8, "completions/max_terminated_length": 758.8, "completions/mean_length": 195.35341796875, "completions/mean_terminated_length": 195.5635498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 89.8, "epoch": 0.48, "grad_norm": 0.0009436359978280962, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 503296057.0, "reward": 0.942992627620697, "reward_std": 0.08964750766754151, "rewards/accuracy_reward": 0.5171875, "rewards/brier_reward": 0.771629810333252, "rewards/confidence_uniqueness_reward": 0.9447030186653137, "rewards/format_reward": 0.99892578125, "rewards/frontier_coverage_0": 0.13579091578722, "rewards/frontier_coverage_1": 0.13579091578722, "rewards/frontier_coverage_10": 0.13579091578722, "rewards/frontier_coverage_15": 0.13579091578722, "rewards/frontier_coverage_20": 0.13579091578722, "rewards/frontier_coverage_25": 0.1155124381184578, "rewards/frontier_coverage_5": 0.13579091578722, "signal/accuracy_reward/centered_abs_mean": 0.1217041015625, "signal/accuracy_reward/group_std_mean": 0.16184872686862944, "signal/accuracy_reward/group_zero_std_frac": 0.5375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06085205078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06085205078125, "signal/advantage_abs_mean": 0.06639757454395294, "signal/advantage_pre_scale_abs_mean": 0.06639757454395294, "signal/advantage_pre_scale_std": 0.11358063519001008, "signal/advantage_std": 0.11358063519001008, "signal/brier_reward/centered_abs_mean": 0.15432437360286713, "signal/brier_reward/group_std_mean": 0.20063064694404603, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01543243769556284, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01543243769556284, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026983362436294556, "signal/confidence_uniqueness_reward/group_std_mean": 0.03629298433661461, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026983361691236495, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026983361691236495, "signal/format_reward/centered_abs_mean": 0.002069091796875, "signal/format_reward/group_std_mean": 0.005740390252321958, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375, "signal/frontier_coverage_0/centered_abs_mean": 0.20254981517791748, "signal/frontier_coverage_0/group_std_mean": 0.2641178369522095, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028964622411876916, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028964622411876916, "signal/frontier_coverage_1/centered_abs_mean": 0.20254981517791748, "signal/frontier_coverage_1/group_std_mean": 0.2641178369522095, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028964622411876916, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028964622411876916, "signal/frontier_coverage_10/centered_abs_mean": 0.20254981517791748, "signal/frontier_coverage_10/group_std_mean": 0.2641178369522095, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028964622411876916, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028964622411876916, "signal/frontier_coverage_15/centered_abs_mean": 0.20254981517791748, "signal/frontier_coverage_15/group_std_mean": 0.2641178369522095, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028964622411876916, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028964622411876916, "signal/frontier_coverage_20/centered_abs_mean": 0.20254981517791748, "signal/frontier_coverage_20/group_std_mean": 0.2641178369522095, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028964622411876916, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028964622411876916, "signal/frontier_coverage_25/centered_abs_mean": 0.15858907699584962, "signal/frontier_coverage_25/group_std_mean": 0.20831416547298431, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022678238339722155, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022678238339722155, "signal/frontier_coverage_5/centered_abs_mean": 0.20254981517791748, "signal/frontier_coverage_5/group_std_mean": 0.2641178369522095, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028964622411876916, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028964622411876916, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.4831069061903782, "eval_calibration/batch_distribution_entropy": 0.8847234666383597, "eval_calibration/buffer_distribution_entropy": 0.9830679215380574, "eval_calibration/confidence_entropy": 0.4128065002360819, "eval_calibration/coverage@0%": 0.078125, "eval_calibration/coverage@1%": 0.078125, "eval_calibration/coverage@10%": 0.078125, "eval_calibration/coverage@15%": 0.0859375, "eval_calibration/coverage@20%": 0.125, "eval_calibration/coverage@25%": 0.2109375, "eval_calibration/coverage@30%": 0.2265625, "eval_calibration/coverage@5%": 0.078125, "eval_calibration/ece": 0.2611121955578006, "eval_calibration/mean_confidence": 0.47455383892968606, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 403.0, "eval_completions/max_terminated_length": 403.0, "eval_completions/mean_length": 195.1864891052246, "eval_completions/mean_terminated_length": 195.1864891052246, "eval_completions/min_length": 101.0, "eval_completions/min_terminated_length": 101.0, "eval_loss": 0.0, "eval_num_tokens": 503296057.0, "eval_reward": 0.8924962729215622, "eval_reward_std": 0.2204424850642681, "eval_rewards/accuracy_reward": 0.408203125, "eval_rewards/brier_reward": 0.7837338745594025, "eval_rewards/confidence_uniqueness_reward": 0.8876953125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.21979742124676704, "eval_rewards/frontier_coverage_1": 0.21979742124676704, "eval_rewards/frontier_coverage_10": 0.21979742124676704, "eval_rewards/frontier_coverage_15": 0.21979742124676704, "eval_rewards/frontier_coverage_20": 0.21979742124676704, "eval_rewards/frontier_coverage_25": 0.1673554927110672, "eval_rewards/frontier_coverage_5": 0.21979742124676704, "eval_runtime": 21.217, "eval_samples_per_second": 23.566, "eval_signal/accuracy_reward/centered_abs_mean": 0.4635009765625, "eval_signal/accuracy_reward/group_std_mean": 0.48869405686855316, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23175048828125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23175048828125, "eval_signal/advantage_abs_mean": 0.20180771127343178, "eval_signal/advantage_pre_scale_abs_mean": 0.20180771127343178, "eval_signal/advantage_pre_scale_std": 0.21814486756920815, "eval_signal/advantage_std": 0.21814486756920815, "eval_signal/brier_reward/centered_abs_mean": 0.22955559566617012, "eval_signal/brier_reward/group_std_mean": 0.2795948311686516, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022955560591071844, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.022955560591071844, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.054779052734375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0656774491071701, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0054779056226834655, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0054779056226834655, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.3919490575790405, "eval_signal/frontier_coverage_0/group_std_mean": 0.4804074615240097, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005604871432296932, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005604871432296932, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3919490575790405, "eval_signal/frontier_coverage_1/group_std_mean": 0.4804074615240097, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005604871432296932, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005604871432296932, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3919490575790405, "eval_signal/frontier_coverage_10/group_std_mean": 0.4804074615240097, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005604871432296932, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005604871432296932, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3919490575790405, "eval_signal/frontier_coverage_15/group_std_mean": 0.4804074615240097, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005604871432296932, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005604871432296932, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3919490575790405, "eval_signal/frontier_coverage_20/group_std_mean": 0.4804074615240097, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005604871432296932, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005604871432296932, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.30262789130210876, "eval_signal/frontier_coverage_25/group_std_mean": 0.3734619617462158, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004327578702941537, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004327578702941537, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3919490575790405, "eval_signal/frontier_coverage_5/group_std_mean": 0.4804074615240097, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005604871432296932, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005604871432296932, "eval_steps_per_second": 0.189, "step": 150 }, { "calibration/aurc": 0.3799576377700869, "calibration/batch_distribution_entropy": 0.942893601281817, "calibration/buffer_distribution_entropy": 0.9810508104668042, "calibration/confidence_entropy": 0.416781882516017, "calibration/coverage@0%": 0.005860139432485323, "calibration/coverage@1%": 0.005860139432485323, "calibration/coverage@10%": 0.11211013943248531, "calibration/coverage@15%": 0.1527810053816047, "calibration/coverage@20%": 0.18170101516634052, "calibration/coverage@25%": 0.21533528008806263, "calibration/coverage@30%": 0.289664180312536, "calibration/coverage@5%": 0.04961013943248532, "calibration/ece": 0.1558927200859373, "calibration/mean_confidence": 0.5103141788755559, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 553.0, "completions/max_terminated_length": 553.0, "completions/mean_length": 194.48505859375, "completions/mean_terminated_length": 194.6558349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 90.2, "epoch": 0.496, "grad_norm": 0.0008825138211250305, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 520595424.0, "reward": 0.9555532336235046, "reward_std": 0.08352845162153244, "rewards/accuracy_reward": 0.54580078125, "rewards/brier_reward": 0.7727875947952271, "rewards/confidence_uniqueness_reward": 0.9481261730194092, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.11409009248018265, "rewards/frontier_coverage_1": 0.11409009248018265, "rewards/frontier_coverage_10": 0.11409009248018265, "rewards/frontier_coverage_15": 0.11409009248018265, "rewards/frontier_coverage_20": 0.11409009248018265, "rewards/frontier_coverage_25": 0.08816799521446228, "rewards/frontier_coverage_5": 0.11409009248018265, "signal/accuracy_reward/centered_abs_mean": 0.103399658203125, "signal/accuracy_reward/group_std_mean": 0.1389443188905716, "signal/accuracy_reward/group_zero_std_frac": 0.5875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0516998291015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0516998291015625, "signal/advantage_abs_mean": 0.06242571994662285, "signal/advantage_pre_scale_abs_mean": 0.06242571994662285, "signal/advantage_pre_scale_std": 0.10876623839139939, "signal/advantage_std": 0.10876623839139939, "signal/brier_reward/centered_abs_mean": 0.1525803655385971, "signal/brier_reward/group_std_mean": 0.19565546214580537, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01525803655385971, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01525803655385971, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024064848199486732, "signal/confidence_uniqueness_reward/group_std_mean": 0.0327242337167263, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024064849596470593, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024064849596470593, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_std_mean": 0.005524271540343762, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_coverage_0/centered_abs_mean": 0.18754335045814513, "signal/frontier_coverage_0/group_std_mean": 0.24416738152503967, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002681869873777032, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002681869873777032, "signal/frontier_coverage_1/centered_abs_mean": 0.18754335045814513, "signal/frontier_coverage_1/group_std_mean": 0.24416738152503967, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002681869873777032, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002681869873777032, "signal/frontier_coverage_10/centered_abs_mean": 0.18754335045814513, "signal/frontier_coverage_10/group_std_mean": 0.24416738152503967, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002681869873777032, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002681869873777032, "signal/frontier_coverage_15/centered_abs_mean": 0.18754335045814513, "signal/frontier_coverage_15/group_std_mean": 0.24416738152503967, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002681869873777032, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002681869873777032, "signal/frontier_coverage_20/centered_abs_mean": 0.18754335045814513, "signal/frontier_coverage_20/group_std_mean": 0.24416738152503967, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002681869873777032, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002681869873777032, "signal/frontier_coverage_25/centered_abs_mean": 0.14260709285736084, "signal/frontier_coverage_25/group_std_mean": 0.18698225021362305, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002039281511679292, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002039281511679292, "signal/frontier_coverage_5/centered_abs_mean": 0.18754335045814513, "signal/frontier_coverage_5/group_std_mean": 0.24416738152503967, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002681869873777032, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002681869873777032, "step": 155 }, { "calibration/aurc": 0.3433580891081784, "calibration/batch_distribution_entropy": 0.9489904033296426, "calibration/buffer_distribution_entropy": 0.9766839241875452, "calibration/confidence_entropy": 0.43796031112504796, "calibration/coverage@0%": 0.01333103404032846, "calibration/coverage@1%": 0.01333103404032846, "calibration/coverage@10%": 0.173933056099152, "calibration/coverage@15%": 0.2659734972756226, "calibration/coverage@20%": 0.34586997513046314, "calibration/coverage@25%": 0.39675685171328806, "calibration/coverage@30%": 0.4453351961503779, "calibration/coverage@5%": 0.09614353404032847, "calibration/ece": 0.16492687563132064, "calibration/mean_confidence": 0.512453271287414, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 571.0, "completions/max_terminated_length": 571.0, "completions/mean_length": 186.35107421875, "completions/mean_terminated_length": 186.5321838378906, "completions/min_length": 0.0, "completions/min_terminated_length": 92.6, "epoch": 0.512, "grad_norm": 0.0010090820724144578, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 537649323.0, "reward": 0.9542897462844848, "reward_std": 0.0837359830737114, "rewards/accuracy_reward": 0.5373046875, "rewards/brier_reward": 0.7862473368644715, "rewards/confidence_uniqueness_reward": 0.9487914681434632, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.1308758407831192, "rewards/frontier_coverage_1": 0.1308758407831192, "rewards/frontier_coverage_10": 0.1308758407831192, "rewards/frontier_coverage_15": 0.1308758407831192, "rewards/frontier_coverage_20": 0.1308758407831192, "rewards/frontier_coverage_25": 0.09738899916410446, "rewards/frontier_coverage_5": 0.1308758407831192, "signal/accuracy_reward/centered_abs_mean": 0.10369873046875, "signal/accuracy_reward/group_std_mean": 0.138237564265728, "signal/accuracy_reward/group_zero_std_frac": 0.603125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051849365234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.051849365234375, "signal/advantage_abs_mean": 0.06327899843454361, "signal/advantage_pre_scale_abs_mean": 0.06327899843454361, "signal/advantage_pre_scale_std": 0.11133654713630677, "signal/advantage_std": 0.11133654713630677, "signal/brier_reward/centered_abs_mean": 0.1488574415445328, "signal/brier_reward/group_std_mean": 0.1922387957572937, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014885743707418441, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014885743707418441, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023807717114686967, "signal/confidence_uniqueness_reward/group_std_mean": 0.03172732964158058, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023807717021554708, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023807717021554708, "signal/format_reward/centered_abs_mean": 0.00186767578125, "signal/format_reward/group_std_mean": 0.00485165468417108, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000933837890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000933837890625, "signal/frontier_coverage_0/centered_abs_mean": 0.1794394850730896, "signal/frontier_coverage_0/group_std_mean": 0.23912697434425353, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002565984660759568, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002565984660759568, "signal/frontier_coverage_1/centered_abs_mean": 0.1794394850730896, "signal/frontier_coverage_1/group_std_mean": 0.23912697434425353, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002565984660759568, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002565984660759568, "signal/frontier_coverage_10/centered_abs_mean": 0.1794394850730896, "signal/frontier_coverage_10/group_std_mean": 0.23912697434425353, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002565984660759568, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002565984660759568, "signal/frontier_coverage_15/centered_abs_mean": 0.1794394850730896, "signal/frontier_coverage_15/group_std_mean": 0.23912697434425353, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002565984660759568, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002565984660759568, "signal/frontier_coverage_20/centered_abs_mean": 0.1794394850730896, "signal/frontier_coverage_20/group_std_mean": 0.23912697434425353, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002565984660759568, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002565984660759568, "signal/frontier_coverage_25/centered_abs_mean": 0.12620090395212175, "signal/frontier_coverage_25/group_std_mean": 0.16993048191070556, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018046729266643525, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018046729266643525, "signal/frontier_coverage_5/centered_abs_mean": 0.1794394850730896, "signal/frontier_coverage_5/group_std_mean": 0.23912697434425353, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002565984660759568, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002565984660759568, "step": 160 }, { "calibration/aurc": 0.20960424511882794, "calibration/batch_distribution_entropy": 0.937826372264357, "calibration/buffer_distribution_entropy": 0.9732044406089282, "calibration/confidence_entropy": 0.4191032450695021, "calibration/coverage@0%": 0.017591884784735813, "calibration/coverage@1%": 0.017591884784735813, "calibration/coverage@10%": 0.2767230308219178, "calibration/coverage@15%": 0.45409506482387474, "calibration/coverage@20%": 0.5728779354207436, "calibration/coverage@25%": 0.6584622676125245, "calibration/coverage@30%": 0.7624204990215264, "calibration/coverage@5%": 0.0547142551369863, "calibration/ece": 0.11670693359510136, "calibration/mean_confidence": 0.5251318956402845, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 606.6, "completions/max_terminated_length": 606.6, "completions/mean_length": 180.396875, "completions/mean_terminated_length": 180.43230285644532, "completions/min_length": 54.2, "completions/min_terminated_length": 89.0, "epoch": 0.528, "grad_norm": 0.0013867069501429796, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 554526123.0, "reward": 0.9590974926948548, "reward_std": 0.07940471768379212, "rewards/accuracy_reward": 0.5408203125, "rewards/brier_reward": 0.799199378490448, "rewards/confidence_uniqueness_reward": 0.9451055526733398, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.14895989149808883, "rewards/frontier_coverage_1": 0.14895989149808883, "rewards/frontier_coverage_10": 0.14895989149808883, "rewards/frontier_coverage_15": 0.14895989149808883, "rewards/frontier_coverage_20": 0.14895989149808883, "rewards/frontier_coverage_25": 0.11005051881074905, "rewards/frontier_coverage_5": 0.14895989149808883, "signal/accuracy_reward/centered_abs_mean": 0.1129638671875, "signal/accuracy_reward/group_std_mean": 0.14522374868392945, "signal/accuracy_reward/group_zero_std_frac": 0.596875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05648193359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05648193359375, "signal/advantage_abs_mean": 0.06193904280662536, "signal/advantage_pre_scale_abs_mean": 0.06193904280662536, "signal/advantage_pre_scale_std": 0.1081055223941803, "signal/advantage_std": 0.1081055223941803, "signal/brier_reward/centered_abs_mean": 0.1428891509771347, "signal/brier_reward/group_std_mean": 0.180334734916687, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014288916438817977, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014288916438817977, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026091757416725158, "signal/confidence_uniqueness_reward/group_std_mean": 0.03331942185759544, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002609175816178322, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002609175816178322, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.18938855528831483, "signal/frontier_coverage_0/group_std_mean": 0.2450707495212555, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00270825638435781, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00270825638435781, "signal/frontier_coverage_1/centered_abs_mean": 0.18938855528831483, "signal/frontier_coverage_1/group_std_mean": 0.2450707495212555, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00270825638435781, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00270825638435781, "signal/frontier_coverage_10/centered_abs_mean": 0.18938855528831483, "signal/frontier_coverage_10/group_std_mean": 0.2450707495212555, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00270825638435781, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00270825638435781, "signal/frontier_coverage_15/centered_abs_mean": 0.18938855528831483, "signal/frontier_coverage_15/group_std_mean": 0.2450707495212555, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00270825638435781, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00270825638435781, "signal/frontier_coverage_20/centered_abs_mean": 0.18938855528831483, "signal/frontier_coverage_20/group_std_mean": 0.2450707495212555, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00270825638435781, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00270825638435781, "signal/frontier_coverage_25/centered_abs_mean": 0.1290591835975647, "signal/frontier_coverage_25/group_std_mean": 0.16820741891860963, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018455463228747248, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018455463228747248, "signal/frontier_coverage_5/centered_abs_mean": 0.18938855528831483, "signal/frontier_coverage_5/group_std_mean": 0.2450707495212555, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00270825638435781, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00270825638435781, "step": 165 }, { "calibration/aurc": 0.22657361490672714, "calibration/batch_distribution_entropy": 0.9040110531831956, "calibration/buffer_distribution_entropy": 0.9696198518608401, "calibration/confidence_entropy": 0.4071839362196341, "calibration/coverage@0%": 0.028537793542074363, "calibration/coverage@1%": 0.028537793542074363, "calibration/coverage@10%": 0.1836938906555773, "calibration/coverage@15%": 0.30716579011741685, "calibration/coverage@20%": 0.5076190985812132, "calibration/coverage@25%": 0.6631237769080235, "calibration/coverage@30%": 0.7479207436399217, "calibration/coverage@5%": 0.06372462084148728, "calibration/ece": 0.09039096955456043, "calibration/mean_confidence": 0.5459185071689842, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 513.6, "completions/max_terminated_length": 513.6, "completions/mean_length": 174.13134765625, "completions/mean_terminated_length": 174.1650604248047, "completions/min_length": 64.6, "completions/min_terminated_length": 82.2, "epoch": 0.544, "grad_norm": 0.0009238318889401853, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 571472812.0, "reward": 0.9676682591438294, "reward_std": 0.08454482406377792, "rewards/accuracy_reward": 0.56689453125, "rewards/brier_reward": 0.7879523038864136, "rewards/confidence_uniqueness_reward": 0.9500796794891357, "rewards/format_reward": 0.99970703125, "rewards/frontier_coverage_0": 0.10987547188997268, "rewards/frontier_coverage_1": 0.10987547188997268, "rewards/frontier_coverage_10": 0.10987547188997268, "rewards/frontier_coverage_15": 0.10987547188997268, "rewards/frontier_coverage_20": 0.10958926826715469, "rewards/frontier_coverage_25": 0.07979481071233749, "rewards/frontier_coverage_5": 0.10987547188997268, "signal/accuracy_reward/centered_abs_mean": 0.115875244140625, "signal/accuracy_reward/group_std_mean": 0.157163542509079, "signal/accuracy_reward/group_zero_std_frac": 0.5375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0579376220703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0579376220703125, "signal/advantage_abs_mean": 0.06283592209219932, "signal/advantage_pre_scale_abs_mean": 0.06283592209219932, "signal/advantage_pre_scale_std": 0.11006280481815338, "signal/advantage_std": 0.11006280481815338, "signal/brier_reward/centered_abs_mean": 0.1496346950531006, "signal/brier_reward/group_std_mean": 0.19032892882823943, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014963469840586186, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014963469840586186, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023456166312098504, "signal/confidence_uniqueness_reward/group_std_mean": 0.03012530505657196, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002345616649836302, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002345616649836302, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_coverage_0/centered_abs_mean": 0.18942977488040924, "signal/frontier_coverage_0/group_std_mean": 0.2492722123861313, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027088457718491554, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027088457718491554, "signal/frontier_coverage_1/centered_abs_mean": 0.18942977488040924, "signal/frontier_coverage_1/group_std_mean": 0.2492722123861313, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027088457718491554, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027088457718491554, "signal/frontier_coverage_10/centered_abs_mean": 0.18942977488040924, "signal/frontier_coverage_10/group_std_mean": 0.2492722123861313, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027088457718491554, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027088457718491554, "signal/frontier_coverage_15/centered_abs_mean": 0.18942977488040924, "signal/frontier_coverage_15/group_std_mean": 0.2492722123861313, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027088457718491554, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027088457718491554, "signal/frontier_coverage_20/centered_abs_mean": 0.18900564014911653, "signal/frontier_coverage_20/group_std_mean": 0.24872445166110993, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027027806732803582, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027027806732803582, "signal/frontier_coverage_25/centered_abs_mean": 0.1239845871925354, "signal/frontier_coverage_25/group_std_mean": 0.1644377052783966, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017729795770719647, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017729795770719647, "signal/frontier_coverage_5/centered_abs_mean": 0.18942977488040924, "signal/frontier_coverage_5/group_std_mean": 0.2492722123861313, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027088457718491554, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027088457718491554, "step": 170 }, { "calibration/aurc": 0.2587221330992257, "calibration/batch_distribution_entropy": 0.9351214394358193, "calibration/buffer_distribution_entropy": 0.9666515299239423, "calibration/confidence_entropy": 0.43241105274145475, "calibration/coverage@0%": 0.065234375, "calibration/coverage@1%": 0.10078125, "calibration/coverage@10%": 0.231640625, "calibration/coverage@15%": 0.298828125, "calibration/coverage@20%": 0.360546875, "calibration/coverage@25%": 0.492578125, "calibration/coverage@30%": 0.56875, "calibration/coverage@5%": 0.19453125, "calibration/ece": 0.12503712805676448, "calibration/mean_confidence": 0.5194717080207317, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 501.6, "completions/max_terminated_length": 501.6, "completions/mean_length": 171.4548828125, "completions/mean_terminated_length": 171.48790588378907, "completions/min_length": 52.0, "completions/min_terminated_length": 86.4, "epoch": 0.56, "grad_norm": 0.0008401142549701035, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 588049918.0, "reward": 0.9541451930999756, "reward_std": 0.07544240057468414, "rewards/accuracy_reward": 0.5296875, "rewards/brier_reward": 0.8010993957519531, "rewards/confidence_uniqueness_reward": 0.9509715795516968, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.14851057529449463, "rewards/frontier_coverage_1": 0.14851057529449463, "rewards/frontier_coverage_10": 0.14851057529449463, "rewards/frontier_coverage_15": 0.14851057529449463, "rewards/frontier_coverage_20": 0.14732412695884706, "rewards/frontier_coverage_25": 0.10257082134485244, "rewards/frontier_coverage_5": 0.14851057529449463, "signal/accuracy_reward/centered_abs_mean": 0.0928955078125, "signal/accuracy_reward/group_std_mean": 0.12647614181041716, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04644775390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04644775390625, "signal/advantage_abs_mean": 0.05650952383875847, "signal/advantage_pre_scale_abs_mean": 0.05650952383875847, "signal/advantage_pre_scale_std": 0.10081221014261246, "signal/advantage_std": 0.10081221014261246, "signal/brier_reward/centered_abs_mean": 0.14023579359054567, "signal/brier_reward/group_std_mean": 0.1808777332305908, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014023579470813275, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014023579470813275, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022420838847756384, "signal/confidence_uniqueness_reward/group_std_mean": 0.028805967792868613, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022420838475227358, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022420838475227358, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.1793442577123642, "signal/frontier_coverage_0/group_std_mean": 0.23208971619606017, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025646228808909655, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025646228808909655, "signal/frontier_coverage_1/centered_abs_mean": 0.1793442577123642, "signal/frontier_coverage_1/group_std_mean": 0.23208971619606017, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025646228808909655, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025646228808909655, "signal/frontier_coverage_10/centered_abs_mean": 0.1793442577123642, "signal/frontier_coverage_10/group_std_mean": 0.23208971619606017, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025646228808909655, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025646228808909655, "signal/frontier_coverage_15/centered_abs_mean": 0.1793442577123642, "signal/frontier_coverage_15/group_std_mean": 0.23208971619606017, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025646228808909655, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025646228808909655, "signal/frontier_coverage_20/centered_abs_mean": 0.17742233574390412, "signal/frontier_coverage_20/group_std_mean": 0.22966001629829408, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025371393654495477, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025371393654495477, "signal/frontier_coverage_25/centered_abs_mean": 0.11494539082050323, "signal/frontier_coverage_25/group_std_mean": 0.14966228008270263, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016437190817669034, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016437190817669034, "signal/frontier_coverage_5/centered_abs_mean": 0.1793442577123642, "signal/frontier_coverage_5/group_std_mean": 0.23208971619606017, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025646228808909655, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025646228808909655, "step": 175 }, { "calibration/aurc": 0.32387850371371785, "calibration/batch_distribution_entropy": 0.930447230073459, "calibration/buffer_distribution_entropy": 0.9651309621388965, "calibration/confidence_entropy": 0.4303574408490675, "calibration/coverage@0%": 0.02265625, "calibration/coverage@1%": 0.02265625, "calibration/coverage@10%": 0.08438264432485323, "calibration/coverage@15%": 0.2152420193248532, "calibration/coverage@20%": 0.2941482693248532, "calibration/coverage@25%": 0.3402420193248532, "calibration/coverage@30%": 0.40821459148727984, "calibration/coverage@5%": 0.040234375, "calibration/ece": 0.10406415746330164, "calibration/mean_confidence": 0.5168637963576618, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 452.2, "completions/max_terminated_length": 452.2, "completions/mean_length": 171.79189453125, "completions/mean_terminated_length": 171.90999450683594, "completions/min_length": 18.2, "completions/min_terminated_length": 88.2, "epoch": 0.576, "grad_norm": 0.0009653819724917412, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 604995691.0, "reward": 0.943269693851471, "reward_std": 0.06973677352070809, "rewards/accuracy_reward": 0.515234375, "rewards/brier_reward": 0.7794225335121154, "rewards/confidence_uniqueness_reward": 0.9458268165588379, "rewards/format_reward": 0.99931640625, "rewards/frontier_coverage_0": 0.14076022952795028, "rewards/frontier_coverage_1": 0.14076022952795028, "rewards/frontier_coverage_10": 0.14076022952795028, "rewards/frontier_coverage_15": 0.14076022952795028, "rewards/frontier_coverage_20": 0.14067162126302718, "rewards/frontier_coverage_25": 0.09744109660387039, "rewards/frontier_coverage_5": 0.14076022952795028, "signal/accuracy_reward/centered_abs_mean": 0.080224609375, "signal/accuracy_reward/group_std_mean": 0.11059063673019409, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0401123046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0401123046875, "signal/advantage_abs_mean": 0.0513992503285408, "signal/advantage_pre_scale_abs_mean": 0.0513992503285408, "signal/advantage_pre_scale_std": 0.09461777806282043, "signal/advantage_std": 0.09461777806282043, "signal/brier_reward/centered_abs_mean": 0.13932594060897827, "signal/brier_reward/group_std_mean": 0.17819225192070007, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0139325937256217, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0139325937256217, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026319159567356108, "signal/confidence_uniqueness_reward/group_std_mean": 0.034161582589149475, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002631915872916579, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002631915872916579, "signal/format_reward/centered_abs_mean": 0.001300048828125, "signal/format_reward/group_std_mean": 0.0031943732406944036, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006500244140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006500244140625, "signal/frontier_coverage_0/centered_abs_mean": 0.1736948162317276, "signal/frontier_coverage_0/group_std_mean": 0.22478666603565217, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002483835769817233, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002483835769817233, "signal/frontier_coverage_1/centered_abs_mean": 0.1736948162317276, "signal/frontier_coverage_1/group_std_mean": 0.22478666603565217, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002483835769817233, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002483835769817233, "signal/frontier_coverage_10/centered_abs_mean": 0.1736948162317276, "signal/frontier_coverage_10/group_std_mean": 0.22478666603565217, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002483835769817233, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002483835769817233, "signal/frontier_coverage_15/centered_abs_mean": 0.1736948162317276, "signal/frontier_coverage_15/group_std_mean": 0.22478666603565217, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002483835769817233, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002483835769817233, "signal/frontier_coverage_20/centered_abs_mean": 0.17225461602210998, "signal/frontier_coverage_20/group_std_mean": 0.22295468747615815, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002463240968063474, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002463240968063474, "signal/frontier_coverage_25/centered_abs_mean": 0.11142444163560868, "signal/frontier_coverage_25/group_std_mean": 0.14512277245521546, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001593369501642883, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001593369501642883, "signal/frontier_coverage_5/centered_abs_mean": 0.1736948162317276, "signal/frontier_coverage_5/group_std_mean": 0.22478666603565217, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002483835769817233, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002483835769817233, "step": 180 }, { "calibration/aurc": 0.2917369911897618, "calibration/batch_distribution_entropy": 0.9291020216354537, "calibration/buffer_distribution_entropy": 0.9625577310801576, "calibration/confidence_entropy": 0.4217634313370632, "calibration/coverage@0%": 0.02306827910958904, "calibration/coverage@1%": 0.02306827910958904, "calibration/coverage@10%": 0.21228825220156552, "calibration/coverage@15%": 0.31037105552837574, "calibration/coverage@20%": 0.4471219116927593, "calibration/coverage@25%": 0.5424604023972603, "calibration/coverage@30%": 0.6170950036692759, "calibration/coverage@5%": 0.06802608243639921, "calibration/ece": 0.13262487285036945, "calibration/mean_confidence": 0.49193563628804593, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 441.0, "completions/max_terminated_length": 441.0, "completions/mean_length": 171.99794921875, "completions/mean_terminated_length": 172.03227844238282, "completions/min_length": 69.0, "completions/min_terminated_length": 83.6, "epoch": 0.592, "grad_norm": 0.0010885463561862707, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 621924662.0, "reward": 0.9475321412086487, "reward_std": 0.07133645117282868, "rewards/accuracy_reward": 0.52119140625, "rewards/brier_reward": 0.7884288311004639, "rewards/confidence_uniqueness_reward": 0.9417649745941162, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.14728878438472748, "rewards/frontier_coverage_1": 0.14728878438472748, "rewards/frontier_coverage_10": 0.14728878438472748, "rewards/frontier_coverage_15": 0.14728878438472748, "rewards/frontier_coverage_20": 0.14530769288539885, "rewards/frontier_coverage_25": 0.09829618036746979, "rewards/frontier_coverage_5": 0.14728878438472748, "signal/accuracy_reward/centered_abs_mean": 0.097454833984375, "signal/accuracy_reward/group_std_mean": 0.12659366130828859, "signal/accuracy_reward/group_zero_std_frac": 0.64375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0487274169921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0487274169921875, "signal/advantage_abs_mean": 0.0545043371617794, "signal/advantage_pre_scale_abs_mean": 0.0545043371617794, "signal/advantage_pre_scale_std": 0.09718780517578125, "signal/advantage_std": 0.09718780517578125, "signal/brier_reward/centered_abs_mean": 0.13488493859767914, "signal/brier_reward/group_std_mean": 0.17273030877113343, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013488493859767914, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013488493859767914, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029794788360595702, "signal/confidence_uniqueness_reward/group_std_mean": 0.03787661641836167, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029794787988066673, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029794787988066673, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.18529422879219054, "signal/frontier_coverage_0/group_std_mean": 0.23799728453159333, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026497074868530035, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026497074868530035, "signal/frontier_coverage_1/centered_abs_mean": 0.18529422879219054, "signal/frontier_coverage_1/group_std_mean": 0.23799728453159333, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026497074868530035, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026497074868530035, "signal/frontier_coverage_10/centered_abs_mean": 0.18529422879219054, "signal/frontier_coverage_10/group_std_mean": 0.23799728453159333, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026497074868530035, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026497074868530035, "signal/frontier_coverage_15/centered_abs_mean": 0.18529422879219054, "signal/frontier_coverage_15/group_std_mean": 0.23799728453159333, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026497074868530035, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026497074868530035, "signal/frontier_coverage_20/centered_abs_mean": 0.1818026602268219, "signal/frontier_coverage_20/group_std_mean": 0.2335704207420349, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002599778026342392, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002599778026342392, "signal/frontier_coverage_25/centered_abs_mean": 0.11709080636501312, "signal/frontier_coverage_25/group_std_mean": 0.1516294687986374, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016743984539061784, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016743984539061784, "signal/frontier_coverage_5/centered_abs_mean": 0.18529422879219054, "signal/frontier_coverage_5/group_std_mean": 0.23799728453159333, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026497074868530035, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026497074868530035, "step": 185 }, { "calibration/aurc": 0.2309774073824331, "calibration/batch_distribution_entropy": 0.9009881297477383, "calibration/buffer_distribution_entropy": 0.9599412902555885, "calibration/confidence_entropy": 0.3998726480587709, "calibration/coverage@0%": 0.04887867647058823, "calibration/coverage@1%": 0.04887867647058823, "calibration/coverage@10%": 0.2439292279411765, "calibration/coverage@15%": 0.363890931372549, "calibration/coverage@20%": 0.49204044117647056, "calibration/coverage@25%": 0.5959742647058823, "calibration/coverage@30%": 0.6948376225490196, "calibration/coverage@5%": 0.1371813725490196, "calibration/ece": 0.1267814975727107, "calibration/mean_confidence": 0.45705972449120064, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 444.0, "completions/max_terminated_length": 444.0, "completions/mean_length": 172.74423828125, "completions/mean_terminated_length": 172.8115478515625, "completions/min_length": 54.2, "completions/min_terminated_length": 88.0, "epoch": 0.608, "grad_norm": 0.0008090813644230366, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 638693051.0, "reward": 0.9527804374694824, "reward_std": 0.06479336544871331, "rewards/accuracy_reward": 0.52177734375, "rewards/brier_reward": 0.812950336933136, "rewards/confidence_uniqueness_reward": 0.9374483942985534, "rewards/format_reward": 0.99951171875, "rewards/frontier_coverage_0": 0.18071360886096954, "rewards/frontier_coverage_1": 0.18071360886096954, "rewards/frontier_coverage_10": 0.18071360886096954, "rewards/frontier_coverage_15": 0.18071360886096954, "rewards/frontier_coverage_20": 0.17192818522453307, "rewards/frontier_coverage_25": 0.12003123611211777, "rewards/frontier_coverage_5": 0.18071360886096954, "signal/accuracy_reward/centered_abs_mean": 0.089129638671875, "signal/accuracy_reward/group_std_mean": 0.11902370899915696, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0445648193359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0445648193359375, "signal/advantage_abs_mean": 0.04798509031534195, "signal/advantage_pre_scale_abs_mean": 0.04798509031534195, "signal/advantage_pre_scale_std": 0.08976030051708221, "signal/advantage_std": 0.08976030051708221, "signal/brier_reward/centered_abs_mean": 0.12546005249023437, "signal/brier_reward/group_std_mean": 0.16316278874874116, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01254600528627634, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01254600528627634, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03275583237409592, "signal/confidence_uniqueness_reward/group_std_mean": 0.04150558784604073, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003275583265349269, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003275583265349269, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.002762135770171881, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_coverage_0/centered_abs_mean": 0.18598188161849977, "signal/frontier_coverage_0/group_std_mean": 0.23882973790168763, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026595407631248235, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026595407631248235, "signal/frontier_coverage_1/centered_abs_mean": 0.18598188161849977, "signal/frontier_coverage_1/group_std_mean": 0.23882973790168763, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026595407631248235, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026595407631248235, "signal/frontier_coverage_10/centered_abs_mean": 0.18598188161849977, "signal/frontier_coverage_10/group_std_mean": 0.23882973790168763, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026595407631248235, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026595407631248235, "signal/frontier_coverage_15/centered_abs_mean": 0.18598188161849977, "signal/frontier_coverage_15/group_std_mean": 0.23882973790168763, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026595407631248235, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026595407631248235, "signal/frontier_coverage_20/centered_abs_mean": 0.1740649312734604, "signal/frontier_coverage_20/group_std_mean": 0.22380025088787078, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024891285225749015, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024891285225749015, "signal/frontier_coverage_25/centered_abs_mean": 0.11340802162885666, "signal/frontier_coverage_25/group_std_mean": 0.14648526012897492, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016217347467318178, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016217347467318178, "signal/frontier_coverage_5/centered_abs_mean": 0.18598188161849977, "signal/frontier_coverage_5/group_std_mean": 0.23882973790168763, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026595407631248235, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026595407631248235, "step": 190 }, { "calibration/aurc": 0.24050447695605665, "calibration/batch_distribution_entropy": 0.940744980652436, "calibration/buffer_distribution_entropy": 0.9571343071662861, "calibration/confidence_entropy": 0.4350306108006845, "calibration/coverage@0%": 0.0171875, "calibration/coverage@1%": 0.0171875, "calibration/coverage@10%": 0.1765625, "calibration/coverage@15%": 0.278515625, "calibration/coverage@20%": 0.48203125, "calibration/coverage@25%": 0.57890625, "calibration/coverage@30%": 0.659375, "calibration/coverage@5%": 0.05078125, "calibration/ece": 0.11090657006311821, "calibration/mean_confidence": 0.5068443759745327, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 404.4, "completions/max_terminated_length": 404.4, "completions/mean_length": 174.5966796875, "completions/mean_terminated_length": 174.6303924560547, "completions/min_length": 67.8, "completions/min_terminated_length": 84.8, "epoch": 0.624, "grad_norm": 0.0013040411286056042, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 655824825.0, "reward": 0.9567921876907348, "reward_std": 0.07369585633277893, "rewards/accuracy_reward": 0.534375, "rewards/brier_reward": 0.8031170845031739, "rewards/confidence_uniqueness_reward": 0.9507421255111694, "rewards/format_reward": 0.99970703125, "rewards/frontier_coverage_0": 0.15281389355659486, "rewards/frontier_coverage_1": 0.15281389355659486, "rewards/frontier_coverage_10": 0.15281389355659486, "rewards/frontier_coverage_15": 0.15281389355659486, "rewards/frontier_coverage_20": 0.14267186522483827, "rewards/frontier_coverage_25": 0.09782160967588424, "rewards/frontier_coverage_5": 0.15281389355659486, "signal/accuracy_reward/centered_abs_mean": 0.099169921875, "signal/accuracy_reward/group_std_mean": 0.12814173698425294, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0495849609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0495849609375, "signal/advantage_abs_mean": 0.05669146254658699, "signal/advantage_pre_scale_abs_mean": 0.05669146254658699, "signal/advantage_pre_scale_std": 0.10152237415313721, "signal/advantage_std": 0.10152237415313721, "signal/brier_reward/centered_abs_mean": 0.13695546239614487, "signal/brier_reward/group_std_mean": 0.1752742975950241, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013695546798408032, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013695546798408032, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023070438578724862, "signal/confidence_uniqueness_reward/group_std_mean": 0.02995435558259487, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002307043923065066, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002307043923065066, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_coverage_0/centered_abs_mean": 0.18192780315876006, "signal/frontier_coverage_0/group_std_mean": 0.23622408807277678, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002601567655801773, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002601567655801773, "signal/frontier_coverage_1/centered_abs_mean": 0.18192780315876006, "signal/frontier_coverage_1/group_std_mean": 0.23622408807277678, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002601567655801773, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002601567655801773, "signal/frontier_coverage_10/centered_abs_mean": 0.18192780315876006, "signal/frontier_coverage_10/group_std_mean": 0.23622408807277678, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002601567655801773, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002601567655801773, "signal/frontier_coverage_15/centered_abs_mean": 0.18192780315876006, "signal/frontier_coverage_15/group_std_mean": 0.23622408807277678, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002601567655801773, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002601567655801773, "signal/frontier_coverage_20/centered_abs_mean": 0.16533594131469725, "signal/frontier_coverage_20/group_std_mean": 0.2150631368160248, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00236430405639112, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00236430405639112, "signal/frontier_coverage_25/centered_abs_mean": 0.10322353839874268, "signal/frontier_coverage_25/group_std_mean": 0.1351427912712097, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014760966412723064, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014760966412723064, "signal/frontier_coverage_5/centered_abs_mean": 0.18192780315876006, "signal/frontier_coverage_5/group_std_mean": 0.23622408807277678, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002601567655801773, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002601567655801773, "step": 195 }, { "calibration/aurc": 0.2662952529255084, "calibration/batch_distribution_entropy": 0.9377166997975432, "calibration/buffer_distribution_entropy": 0.9553364115728316, "calibration/confidence_entropy": 0.441204286731301, "calibration/coverage@0%": 0.04414138943248532, "calibration/coverage@1%": 0.09422547700587083, "calibration/coverage@10%": 0.3096624266144814, "calibration/coverage@15%": 0.34836029231898236, "calibration/coverage@20%": 0.4347167013209393, "calibration/coverage@25%": 0.515987340998043, "calibration/coverage@30%": 0.60390625, "calibration/coverage@5%": 0.19084056996086105, "calibration/ece": 0.16710317600982427, "calibration/mean_confidence": 0.5716693336470889, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 448.2, "completions/max_terminated_length": 448.2, "completions/mean_length": 179.13974609375, "completions/mean_terminated_length": 179.2987823486328, "completions/min_length": 56.0, "completions/min_terminated_length": 91.0, "epoch": 0.64, "grad_norm": 0.0009510635281912982, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 673001904.0, "reward": 0.968864917755127, "reward_std": 0.06678919866681099, "rewards/accuracy_reward": 0.5662109375, "rewards/brier_reward": 0.7999752283096313, "rewards/confidence_uniqueness_reward": 0.9497535705566407, "rewards/format_reward": 0.99912109375, "rewards/frontier_coverage_0": 0.12023089975118637, "rewards/frontier_coverage_1": 0.12023089975118637, "rewards/frontier_coverage_10": 0.12023089975118637, "rewards/frontier_coverage_15": 0.12023089975118637, "rewards/frontier_coverage_20": 0.10779803842306138, "rewards/frontier_coverage_25": 0.07608477771282196, "rewards/frontier_coverage_5": 0.12023089975118637, "signal/accuracy_reward/centered_abs_mean": 0.0839599609375, "signal/accuracy_reward/group_std_mean": 0.1103439062833786, "signal/accuracy_reward/group_zero_std_frac": 0.690625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04197998046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04197998046875, "signal/advantage_abs_mean": 0.05087776109576225, "signal/advantage_pre_scale_abs_mean": 0.05087776109576225, "signal/advantage_pre_scale_std": 0.09735682904720307, "signal/advantage_std": 0.09735682904720307, "signal/brier_reward/centered_abs_mean": 0.12388549447059631, "signal/brier_reward/group_std_mean": 0.15970089435577392, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012388549372553825, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012388549372553825, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02408561371266842, "signal/confidence_uniqueness_reward/group_std_mean": 0.030729348585009576, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024085613898932935, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024085613898932935, "signal/format_reward/centered_abs_mean": 0.001446533203125, "signal/format_reward/group_std_mean": 0.0024173962883651257, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007232666015625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007232666015625, "signal/frontier_coverage_0/centered_abs_mean": 0.1510331243276596, "signal/frontier_coverage_0/group_std_mean": 0.1984680712223053, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002159773651510477, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002159773651510477, "signal/frontier_coverage_1/centered_abs_mean": 0.1510331243276596, "signal/frontier_coverage_1/group_std_mean": 0.1984680712223053, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002159773651510477, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002159773651510477, "signal/frontier_coverage_10/centered_abs_mean": 0.1510331243276596, "signal/frontier_coverage_10/group_std_mean": 0.1984680712223053, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002159773651510477, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002159773651510477, "signal/frontier_coverage_15/centered_abs_mean": 0.1510331243276596, "signal/frontier_coverage_15/group_std_mean": 0.1984680712223053, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002159773651510477, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002159773651510477, "signal/frontier_coverage_20/centered_abs_mean": 0.13272771388292312, "signal/frontier_coverage_20/group_std_mean": 0.17479420006275176, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018980062566697597, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018980062566697597, "signal/frontier_coverage_25/centered_abs_mean": 0.08362834304571151, "signal/frontier_coverage_25/group_std_mean": 0.11021229475736619, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011958853341639043, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011958853341639043, "signal/frontier_coverage_5/centered_abs_mean": 0.1510331243276596, "signal/frontier_coverage_5/group_std_mean": 0.1984680712223053, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002159773651510477, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002159773651510477, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.46054927486329866, "eval_calibration/batch_distribution_entropy": 0.8455805403689154, "eval_calibration/buffer_distribution_entropy": 0.9548862717030364, "eval_calibration/confidence_entropy": 0.38920046266411673, "eval_calibration/coverage@0%": 0.046875, "eval_calibration/coverage@1%": 0.046875, "eval_calibration/coverage@10%": 0.046875, "eval_calibration/coverage@15%": 0.046875, "eval_calibration/coverage@20%": 0.140625, "eval_calibration/coverage@25%": 0.28125, "eval_calibration/coverage@30%": 0.296875, "eval_calibration/coverage@5%": 0.046875, "eval_calibration/ece": 0.21491073393227433, "eval_calibration/mean_confidence": 0.48616073393227427, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 359.0, "eval_completions/max_terminated_length": 359.0, "eval_completions/mean_length": 182.6583366394043, "eval_completions/mean_terminated_length": 182.6583366394043, "eval_completions/min_length": 94.25, "eval_completions/min_terminated_length": 94.25, "eval_loss": 0.0, "eval_num_tokens": 673001904.0, "eval_reward": 0.902785137295723, "eval_reward_std": 0.2266659364104271, "eval_rewards/accuracy_reward": 0.431640625, "eval_rewards/brier_reward": 0.7861361354589462, "eval_rewards/confidence_uniqueness_reward": 0.893310546875, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.2071906253695488, "eval_rewards/frontier_coverage_1": 0.2071906253695488, "eval_rewards/frontier_coverage_10": 0.2071906253695488, "eval_rewards/frontier_coverage_15": 0.2071906253695488, "eval_rewards/frontier_coverage_20": 0.18089748173952103, "eval_rewards/frontier_coverage_25": 0.11322920396924019, "eval_rewards/frontier_coverage_5": 0.2071906253695488, "eval_runtime": 18.9985, "eval_samples_per_second": 26.318, "eval_signal/accuracy_reward/centered_abs_mean": 0.4720458984375, "eval_signal/accuracy_reward/group_std_mean": 0.49339816719293594, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23602294921875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23602294921875, "eval_signal/advantage_abs_mean": 0.21008000895380974, "eval_signal/advantage_pre_scale_abs_mean": 0.21008000895380974, "eval_signal/advantage_pre_scale_std": 0.22413352876901627, "eval_signal/advantage_std": 0.22413352876901627, "eval_signal/brier_reward/centered_abs_mean": 0.2254427894949913, "eval_signal/brier_reward/group_std_mean": 0.2808932363986969, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022544278763234615, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.022544278763234615, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.042633056640625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05089193116873503, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004263305920176208, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004263305920176208, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.37774093449115753, "eval_signal/frontier_coverage_0/group_std_mean": 0.46647125482559204, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005401695379987359, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005401695379987359, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.37774093449115753, "eval_signal/frontier_coverage_1/group_std_mean": 0.46647125482559204, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005401695379987359, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005401695379987359, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.37774093449115753, "eval_signal/frontier_coverage_10/group_std_mean": 0.46647125482559204, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005401695379987359, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005401695379987359, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.37774093449115753, "eval_signal/frontier_coverage_15/group_std_mean": 0.46647125482559204, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005401695379987359, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005401695379987359, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3283703997731209, "eval_signal/frontier_coverage_20/group_std_mean": 0.40697097033262253, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00469569640699774, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00469569640699774, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.19348600879311562, "eval_signal/frontier_coverage_25/group_std_mean": 0.24605557322502136, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027668499387800694, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027668499387800694, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.37774093449115753, "eval_signal/frontier_coverage_5/group_std_mean": 0.46647125482559204, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005401695379987359, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005401695379987359, "eval_steps_per_second": 0.211, "step": 200 }, { "calibration/aurc": 0.4363457773568281, "calibration/batch_distribution_entropy": 0.9584845881514225, "calibration/buffer_distribution_entropy": 0.954657899533002, "calibration/confidence_entropy": 0.45774466342454057, "calibration/coverage@0%": 0.00078125, "calibration/coverage@1%": 0.00078125, "calibration/coverage@10%": 0.00078125, "calibration/coverage@15%": 0.00078125, "calibration/coverage@20%": 0.00078125, "calibration/coverage@25%": 0.0390625, "calibration/coverage@30%": 0.19866071428571427, "calibration/coverage@5%": 0.00078125, "calibration/ece": 0.15967113039820166, "calibration/mean_confidence": 0.5140276042389598, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.8, "completions/max_terminated_length": 408.8, "completions/mean_length": 181.09658203125, "completions/mean_terminated_length": 181.09658203125, "completions/min_length": 81.8, "completions/min_terminated_length": 81.8, "epoch": 0.656, "grad_norm": 0.0008898309315554798, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 689712877.0, "reward": 0.9377357721328735, "reward_std": 0.07557832449674606, "rewards/accuracy_reward": 0.50634765625, "rewards/brier_reward": 0.772177231311798, "rewards/confidence_uniqueness_reward": 0.9539490699768066, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.1302212730050087, "rewards/frontier_coverage_1": 0.1302212730050087, "rewards/frontier_coverage_10": 0.1302212730050087, "rewards/frontier_coverage_15": 0.1302212730050087, "rewards/frontier_coverage_20": 0.11557191163301468, "rewards/frontier_coverage_25": 0.0757653221487999, "rewards/frontier_coverage_5": 0.1302212730050087, "signal/accuracy_reward/centered_abs_mean": 0.095648193359375, "signal/accuracy_reward/group_std_mean": 0.12803646177053452, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0478240966796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0478240966796875, "signal/advantage_abs_mean": 0.05762340724468231, "signal/advantage_pre_scale_abs_mean": 0.05762340724468231, "signal/advantage_pre_scale_std": 0.10331527590751648, "signal/advantage_std": 0.10331527590751648, "signal/brier_reward/centered_abs_mean": 0.14113092422485352, "signal/brier_reward/group_std_mean": 0.180476638674736, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014113092422485351, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014113092422485351, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02103535197675228, "signal/confidence_uniqueness_reward/group_std_mean": 0.027137762680649758, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002103535202331841, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002103535202331841, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.172061425447464, "signal/frontier_coverage_0/group_std_mean": 0.22395667135715486, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024604784324765206, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024604784324765206, "signal/frontier_coverage_1/centered_abs_mean": 0.172061425447464, "signal/frontier_coverage_1/group_std_mean": 0.22395667135715486, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024604784324765206, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024604784324765206, "signal/frontier_coverage_10/centered_abs_mean": 0.172061425447464, "signal/frontier_coverage_10/group_std_mean": 0.22395667135715486, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024604784324765206, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024604784324765206, "signal/frontier_coverage_15/centered_abs_mean": 0.172061425447464, "signal/frontier_coverage_15/group_std_mean": 0.22395667135715486, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024604784324765206, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024604784324765206, "signal/frontier_coverage_20/centered_abs_mean": 0.1527923047542572, "signal/frontier_coverage_20/group_std_mean": 0.19921277165412904, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021849300246685743, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021849300246685743, "signal/frontier_coverage_25/centered_abs_mean": 0.09410947412252427, "signal/frontier_coverage_25/group_std_mean": 0.12340447306632996, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013457655208185316, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013457655208185316, "signal/frontier_coverage_5/centered_abs_mean": 0.172061425447464, "signal/frontier_coverage_5/group_std_mean": 0.22395667135715486, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024604784324765206, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024604784324765206, "step": 205 }, { "calibration/aurc": 0.29616188101698054, "calibration/batch_distribution_entropy": 0.9400251706705747, "calibration/buffer_distribution_entropy": 0.9554609832485716, "calibration/confidence_entropy": 0.43868229938830494, "calibration/coverage@0%": 0.00859375, "calibration/coverage@1%": 0.00859375, "calibration/coverage@10%": 0.088671875, "calibration/coverage@15%": 0.175, "calibration/coverage@20%": 0.25703125, "calibration/coverage@25%": 0.373046875, "calibration/coverage@30%": 0.45546875, "calibration/coverage@5%": 0.069921875, "calibration/ece": 0.1463105905099502, "calibration/mean_confidence": 0.5178573725467299, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 481.4, "completions/max_terminated_length": 481.4, "completions/mean_length": 181.1900390625, "completions/mean_terminated_length": 181.22657470703126, "completions/min_length": 69.4, "completions/min_terminated_length": 87.8, "epoch": 0.672, "grad_norm": 0.0008848632569424808, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 706481703.0, "reward": 0.9508843898773194, "reward_std": 0.06910986453294754, "rewards/accuracy_reward": 0.52646484375, "rewards/brier_reward": 0.7914854645729065, "rewards/confidence_uniqueness_reward": 0.9468018293380738, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.14879233986139298, "rewards/frontier_coverage_1": 0.14879233986139298, "rewards/frontier_coverage_10": 0.14879233986139298, "rewards/frontier_coverage_15": 0.14879233986139298, "rewards/frontier_coverage_20": 0.13711362332105637, "rewards/frontier_coverage_25": 0.0924137145280838, "rewards/frontier_coverage_5": 0.14879233986139298, "signal/accuracy_reward/centered_abs_mean": 0.099713134765625, "signal/accuracy_reward/group_std_mean": 0.12950885742902757, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0498565673828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0498565673828125, "signal/advantage_abs_mean": 0.052921504527330396, "signal/advantage_pre_scale_abs_mean": 0.052921504527330396, "signal/advantage_pre_scale_std": 0.09709036946296692, "signal/advantage_std": 0.09709036946296692, "signal/brier_reward/centered_abs_mean": 0.1296452760696411, "signal/brier_reward/group_std_mean": 0.16551610827445984, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012964527495205403, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012964527495205403, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0252695769071579, "signal/confidence_uniqueness_reward/group_std_mean": 0.03185669630765915, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002526957681402564, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002526957681402564, "signal/format_reward/centered_abs_mean": 0.0003662109375, "signal/format_reward/group_std_mean": 0.000768545875325799, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00018310546875, "signal/frontier_coverage_0/centered_abs_mean": 0.1746266096830368, "signal/frontier_coverage_0/group_std_mean": 0.22620674967765808, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024971604347229005, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024971604347229005, "signal/frontier_coverage_1/centered_abs_mean": 0.1746266096830368, "signal/frontier_coverage_1/group_std_mean": 0.22620674967765808, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024971604347229005, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024971604347229005, "signal/frontier_coverage_10/centered_abs_mean": 0.1746266096830368, "signal/frontier_coverage_10/group_std_mean": 0.22620674967765808, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024971604347229005, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024971604347229005, "signal/frontier_coverage_15/centered_abs_mean": 0.1746266096830368, "signal/frontier_coverage_15/group_std_mean": 0.22620674967765808, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024971604347229005, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024971604347229005, "signal/frontier_coverage_20/centered_abs_mean": 0.15180024206638337, "signal/frontier_coverage_20/group_std_mean": 0.19782575070858002, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021707434207201004, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021707434207201004, "signal/frontier_coverage_25/centered_abs_mean": 0.09401053637266159, "signal/frontier_coverage_25/group_std_mean": 0.12310894876718521, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00134435067884624, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00134435067884624, "signal/frontier_coverage_5/centered_abs_mean": 0.1746266096830368, "signal/frontier_coverage_5/group_std_mean": 0.22620674967765808, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024971604347229005, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024971604347229005, "step": 210 }, { "calibration/aurc": 0.3350253442444463, "calibration/batch_distribution_entropy": 0.9553660352834303, "calibration/buffer_distribution_entropy": 0.9556529548688049, "calibration/confidence_entropy": 0.45873064609980724, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.14375, "calibration/coverage@15%": 0.23359375, "calibration/coverage@20%": 0.33203125, "calibration/coverage@25%": 0.490234375, "calibration/coverage@30%": 0.576953125, "calibration/coverage@5%": 0.034375, "calibration/ece": 0.14170870449303774, "calibration/mean_confidence": 0.4800435907009698, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 439.2, "completions/max_terminated_length": 439.2, "completions/mean_length": 185.12431640625, "completions/mean_terminated_length": 185.16006469726562, "completions/min_length": 52.6, "completions/min_terminated_length": 89.6, "epoch": 0.688, "grad_norm": 0.0012188840191811323, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 723331296.0, "reward": 0.9551563620567322, "reward_std": 0.06885315775871277, "rewards/accuracy_reward": 0.5369140625, "rewards/brier_reward": 0.7924714326858521, "rewards/confidence_uniqueness_reward": 0.9522701382637024, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.13426284790039061, "rewards/frontier_coverage_1": 0.13426284790039061, "rewards/frontier_coverage_10": 0.13426284790039061, "rewards/frontier_coverage_15": 0.13426284790039061, "rewards/frontier_coverage_20": 0.11316078677773475, "rewards/frontier_coverage_25": 0.07726155370473861, "rewards/frontier_coverage_5": 0.13426284790039061, "signal/accuracy_reward/centered_abs_mean": 0.0993408203125, "signal/accuracy_reward/group_std_mean": 0.13218309581279755, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04967041015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04967041015625, "signal/advantage_abs_mean": 0.05133445784449577, "signal/advantage_pre_scale_abs_mean": 0.05133445784449577, "signal/advantage_pre_scale_std": 0.09505542218685151, "signal/advantage_std": 0.09505542218685151, "signal/brier_reward/centered_abs_mean": 0.1251007065176964, "signal/brier_reward/group_std_mean": 0.16029010117053985, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012510071508586407, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012510071508586407, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020702214539051057, "signal/confidence_uniqueness_reward/group_std_mean": 0.026370349898934366, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00207022144459188, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00207022144459188, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.17874427139759064, "signal/frontier_coverage_0/group_std_mean": 0.22735729515552522, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002556043164804578, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002556043164804578, "signal/frontier_coverage_1/centered_abs_mean": 0.17874427139759064, "signal/frontier_coverage_1/group_std_mean": 0.22735729515552522, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002556043164804578, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002556043164804578, "signal/frontier_coverage_10/centered_abs_mean": 0.17874427139759064, "signal/frontier_coverage_10/group_std_mean": 0.22735729515552522, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002556043164804578, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002556043164804578, "signal/frontier_coverage_15/centered_abs_mean": 0.17874427139759064, "signal/frontier_coverage_15/group_std_mean": 0.22735729515552522, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002556043164804578, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002556043164804578, "signal/frontier_coverage_20/centered_abs_mean": 0.1514152020215988, "signal/frontier_coverage_20/group_std_mean": 0.19314254224300384, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021652374416589737, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021652374416589737, "signal/frontier_coverage_25/centered_abs_mean": 0.09458372592926026, "signal/frontier_coverage_25/group_std_mean": 0.12103196978569031, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013525472953915597, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013525472953915597, "signal/frontier_coverage_5/centered_abs_mean": 0.17874427139759064, "signal/frontier_coverage_5/group_std_mean": 0.22735729515552522, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002556043164804578, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002556043164804578, "step": 215 }, { "calibration/aurc": 0.2551421501062629, "calibration/batch_distribution_entropy": 0.9429821506693088, "calibration/buffer_distribution_entropy": 0.9568870103860352, "calibration/confidence_entropy": 0.43942119152318354, "calibration/coverage@0%": 0.019921875, "calibration/coverage@1%": 0.019921875, "calibration/coverage@10%": 0.0671875, "calibration/coverage@15%": 0.20703125, "calibration/coverage@20%": 0.42109375, "calibration/coverage@25%": 0.541015625, "calibration/coverage@30%": 0.638671875, "calibration/coverage@5%": 0.02734375, "calibration/ece": 0.10697865923649652, "calibration/mean_confidence": 0.5212105144066516, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 484.6, "completions/max_terminated_length": 484.6, "completions/mean_length": 184.15478515625, "completions/mean_terminated_length": 184.2274963378906, "completions/min_length": 35.0, "completions/min_terminated_length": 91.6, "epoch": 0.704, "grad_norm": 0.0008549767080694437, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 740083185.0, "reward": 0.9608247399330139, "reward_std": 0.06562578678131104, "rewards/accuracy_reward": 0.54541015625, "rewards/brier_reward": 0.8056188941001892, "rewards/confidence_uniqueness_reward": 0.9541590809822083, "rewards/format_reward": 0.99951171875, "rewards/frontier_coverage_0": 0.13397103250026704, "rewards/frontier_coverage_1": 0.13397103250026704, "rewards/frontier_coverage_10": 0.13397103250026704, "rewards/frontier_coverage_15": 0.13397103250026704, "rewards/frontier_coverage_20": 0.11571932882070542, "rewards/frontier_coverage_25": 0.08058026283979416, "rewards/frontier_coverage_5": 0.13397103250026704, "signal/accuracy_reward/centered_abs_mean": 0.083770751953125, "signal/accuracy_reward/group_std_mean": 0.11368496417999267, "signal/accuracy_reward/group_zero_std_frac": 0.659375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0418853759765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0418853759765625, "signal/advantage_abs_mean": 0.04889403432607651, "signal/advantage_pre_scale_abs_mean": 0.04889403432607651, "signal/advantage_pre_scale_std": 0.09222806245088577, "signal/advantage_std": 0.09222806245088577, "signal/brier_reward/centered_abs_mean": 0.12414066046476364, "signal/brier_reward/group_std_mean": 0.15926886796951295, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01241406574845314, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01241406574845314, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020242217183113097, "signal/confidence_uniqueness_reward/group_std_mean": 0.027059277519583702, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0020242216996848583, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020242216996848583, "signal/format_reward/centered_abs_mean": 0.000933837890625, "signal/format_reward/group_std_mean": 0.0024258273653686045, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125, "signal/frontier_coverage_0/centered_abs_mean": 0.16520517766475679, "signal/frontier_coverage_0/group_std_mean": 0.21402345597743988, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002362434100359678, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002362434100359678, "signal/frontier_coverage_1/centered_abs_mean": 0.16520517766475679, "signal/frontier_coverage_1/group_std_mean": 0.21402345597743988, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002362434100359678, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002362434100359678, "signal/frontier_coverage_10/centered_abs_mean": 0.16520517766475679, "signal/frontier_coverage_10/group_std_mean": 0.21402345597743988, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002362434100359678, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002362434100359678, "signal/frontier_coverage_15/centered_abs_mean": 0.16520517766475679, "signal/frontier_coverage_15/group_std_mean": 0.21402345597743988, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002362434100359678, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002362434100359678, "signal/frontier_coverage_20/centered_abs_mean": 0.13321104198694228, "signal/frontier_coverage_20/group_std_mean": 0.1731552869081497, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019049178808927536, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019049178808927536, "signal/frontier_coverage_25/centered_abs_mean": 0.08578807860612869, "signal/frontier_coverage_25/group_std_mean": 0.11171852350234986, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012267695274204016, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012267695274204016, "signal/frontier_coverage_5/centered_abs_mean": 0.16520517766475679, "signal/frontier_coverage_5/group_std_mean": 0.21402345597743988, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002362434100359678, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002362434100359678, "step": 220 }, { "calibration/aurc": 0.2324519263969714, "calibration/batch_distribution_entropy": 0.9606665053848438, "calibration/buffer_distribution_entropy": 0.9569371832498403, "calibration/confidence_entropy": 0.4423637270520454, "calibration/coverage@0%": 0.056640625, "calibration/coverage@1%": 0.069921875, "calibration/coverage@10%": 0.2640625, "calibration/coverage@15%": 0.34140625, "calibration/coverage@20%": 0.516015625, "calibration/coverage@25%": 0.603515625, "calibration/coverage@30%": 0.677734375, "calibration/coverage@5%": 0.15, "calibration/ece": 0.14048664549176487, "calibration/mean_confidence": 0.5234643985083343, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.4, "completions/max_terminated_length": 452.4, "completions/mean_length": 185.41982421875, "completions/mean_terminated_length": 185.41982421875, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.72, "grad_norm": 0.0010070810094475746, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 756991740.0, "reward": 0.9701446890830994, "reward_std": 0.06546642929315567, "rewards/accuracy_reward": 0.56376953125, "rewards/brier_reward": 0.8114647984504699, "rewards/confidence_uniqueness_reward": 0.9570422768592834, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.1252898707985878, "rewards/frontier_coverage_1": 0.1252898707985878, "rewards/frontier_coverage_10": 0.1252898707985878, "rewards/frontier_coverage_15": 0.1252898707985878, "rewards/frontier_coverage_20": 0.10252745747566223, "rewards/frontier_coverage_25": 0.07228612750768662, "rewards/frontier_coverage_5": 0.1252898707985878, "signal/accuracy_reward/centered_abs_mean": 0.085198974609375, "signal/accuracy_reward/group_std_mean": 0.12007367312908172, "signal/accuracy_reward/group_zero_std_frac": 0.634375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0425994873046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0425994873046875, "signal/advantage_abs_mean": 0.04709207341074943, "signal/advantage_pre_scale_abs_mean": 0.04709207341074943, "signal/advantage_pre_scale_std": 0.09191109389066696, "signal/advantage_std": 0.09191109389066696, "signal/brier_reward/centered_abs_mean": 0.11166439652442932, "signal/brier_reward/group_std_mean": 0.14501525461673737, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011166440136730672, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011166440136730672, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.017501908540725707, "signal/confidence_uniqueness_reward/group_std_mean": 0.022045810893177985, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0017501908587291837, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017501908587291837, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15232807993888856, "signal/frontier_coverage_0/group_std_mean": 0.19803299903869628, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021782914409413934, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021782914409413934, "signal/frontier_coverage_1/centered_abs_mean": 0.15232807993888856, "signal/frontier_coverage_1/group_std_mean": 0.19803299903869628, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021782914409413934, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021782914409413934, "signal/frontier_coverage_10/centered_abs_mean": 0.15232807993888856, "signal/frontier_coverage_10/group_std_mean": 0.19803299903869628, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021782914409413934, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021782914409413934, "signal/frontier_coverage_15/centered_abs_mean": 0.15232807993888856, "signal/frontier_coverage_15/group_std_mean": 0.19803299903869628, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021782914409413934, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021782914409413934, "signal/frontier_coverage_20/centered_abs_mean": 0.11411124169826507, "signal/frontier_coverage_20/group_std_mean": 0.1488576978445053, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016317907487973572, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016317907487973572, "signal/frontier_coverage_25/centered_abs_mean": 0.07262995690107346, "signal/frontier_coverage_25/group_std_mean": 0.09404050707817077, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010386083857156336, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010386083857156336, "signal/frontier_coverage_5/centered_abs_mean": 0.15232807993888856, "signal/frontier_coverage_5/group_std_mean": 0.19803299903869628, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021782914409413934, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021782914409413934, "step": 225 }, { "calibration/aurc": 0.2439529951333029, "calibration/batch_distribution_entropy": 0.9487447159000004, "calibration/buffer_distribution_entropy": 0.9573477855546649, "calibration/confidence_entropy": 0.44479426104559405, "calibration/coverage@0%": 0.006642918297455969, "calibration/coverage@1%": 0.006642918297455969, "calibration/coverage@10%": 0.14687729329745597, "calibration/coverage@15%": 0.36093979329745596, "calibration/coverage@20%": 0.47461166829745594, "calibration/coverage@25%": 0.5570771159491195, "calibration/coverage@30%": 0.6911218811154598, "calibration/coverage@5%": 0.08086166829745597, "calibration/ece": 0.13735173236558448, "calibration/mean_confidence": 0.5550578967354733, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 566.0, "completions/max_terminated_length": 566.0, "completions/mean_length": 187.6255859375, "completions/mean_terminated_length": 187.6255859375, "completions/min_length": 88.8, "completions/min_terminated_length": 88.8, "epoch": 0.736, "grad_norm": 0.0006303332047536969, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 773852610.0, "reward": 0.9724397420883178, "reward_std": 0.061435190588235856, "rewards/accuracy_reward": 0.57197265625, "rewards/brier_reward": 0.8023635387420655, "rewards/confidence_uniqueness_reward": 0.9555821895599366, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.11602756455540657, "rewards/frontier_coverage_1": 0.11602756455540657, "rewards/frontier_coverage_10": 0.11602756455540657, "rewards/frontier_coverage_15": 0.11602756455540657, "rewards/frontier_coverage_20": 0.09671139717102051, "rewards/frontier_coverage_25": 0.07193926870822906, "rewards/frontier_coverage_5": 0.11602756455540657, "signal/accuracy_reward/centered_abs_mean": 0.078399658203125, "signal/accuracy_reward/group_std_mean": 0.10787871330976487, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0391998291015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0391998291015625, "signal/advantage_abs_mean": 0.04567759558558464, "signal/advantage_pre_scale_abs_mean": 0.04567759558558464, "signal/advantage_pre_scale_std": 0.08717550188302994, "signal/advantage_std": 0.08717550188302994, "signal/brier_reward/centered_abs_mean": 0.11523260176181793, "signal/brier_reward/group_std_mean": 0.14933998584747316, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011523259989917278, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011523259989917278, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.018767333030700682, "signal/confidence_uniqueness_reward/group_std_mean": 0.02405969724059105, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0018767332891002297, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018767332891002297, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15312386751174928, "signal/frontier_coverage_0/group_std_mean": 0.19944342970848083, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002189671341329813, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002189671341329813, "signal/frontier_coverage_1/centered_abs_mean": 0.15312386751174928, "signal/frontier_coverage_1/group_std_mean": 0.19944342970848083, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002189671341329813, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002189671341329813, "signal/frontier_coverage_10/centered_abs_mean": 0.15312386751174928, "signal/frontier_coverage_10/group_std_mean": 0.19944342970848083, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002189671341329813, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002189671341329813, "signal/frontier_coverage_15/centered_abs_mean": 0.15312386751174928, "signal/frontier_coverage_15/group_std_mean": 0.19944342970848083, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002189671341329813, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002189671341329813, "signal/frontier_coverage_20/centered_abs_mean": 0.11110765635967254, "signal/frontier_coverage_20/group_std_mean": 0.1452748954296112, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015888395719230175, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015888395719230175, "signal/frontier_coverage_25/centered_abs_mean": 0.07131384164094925, "signal/frontier_coverage_25/group_std_mean": 0.09314024895429611, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010197879397310316, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010197879397310316, "signal/frontier_coverage_5/centered_abs_mean": 0.15312386751174928, "signal/frontier_coverage_5/group_std_mean": 0.19944342970848083, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002189671341329813, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002189671341329813, "step": 230 }, { "calibration/aurc": 0.2604849656231073, "calibration/batch_distribution_entropy": 0.9362567886264237, "calibration/buffer_distribution_entropy": 0.9574063927247802, "calibration/confidence_entropy": 0.4310544919287905, "calibration/coverage@0%": 0.017578125, "calibration/coverage@1%": 0.017578125, "calibration/coverage@10%": 0.165625, "calibration/coverage@15%": 0.347265625, "calibration/coverage@20%": 0.441015625, "calibration/coverage@25%": 0.523046875, "calibration/coverage@30%": 0.62578125, "calibration/coverage@5%": 0.0625, "calibration/ece": 0.1265339089932299, "calibration/mean_confidence": 0.4604680332885215, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 477.4, "completions/max_terminated_length": 477.4, "completions/mean_length": 189.84384765625, "completions/mean_terminated_length": 189.86265258789064, "completions/min_length": 72.4, "completions/min_terminated_length": 92.2, "epoch": 0.752, "grad_norm": 0.000941340927965939, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 791023811.0, "reward": 0.9691111087799072, "reward_std": 0.06380453407764435, "rewards/accuracy_reward": 0.5640625, "rewards/brier_reward": 0.8048084497451782, "rewards/confidence_uniqueness_reward": 0.9539137840270996, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.12513699382543564, "rewards/frontier_coverage_1": 0.12513699382543564, "rewards/frontier_coverage_10": 0.12513699382543564, "rewards/frontier_coverage_15": 0.12513699382543564, "rewards/frontier_coverage_20": 0.09549697563052177, "rewards/frontier_coverage_25": 0.06598303094506264, "rewards/frontier_coverage_5": 0.12513699382543564, "signal/accuracy_reward/centered_abs_mean": 0.08544921875, "signal/accuracy_reward/group_std_mean": 0.1140655368566513, "signal/accuracy_reward/group_zero_std_frac": 0.665625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042724609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.042724609375, "signal/advantage_abs_mean": 0.04823839291930199, "signal/advantage_pre_scale_abs_mean": 0.04823839291930199, "signal/advantage_pre_scale_std": 0.093255215883255, "signal/advantage_std": 0.093255215883255, "signal/brier_reward/centered_abs_mean": 0.10841264575719833, "signal/brier_reward/group_std_mean": 0.14089754223823547, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010841264761984349, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010841264761984349, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019311527907848357, "signal/confidence_uniqueness_reward/group_std_mean": 0.02457045093178749, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0019311528420075774, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019311528420075774, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14460354149341584, "signal/frontier_coverage_0/group_std_mean": 0.18843676149845123, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020678306696936487, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020678306696936487, "signal/frontier_coverage_1/centered_abs_mean": 0.14460354149341584, "signal/frontier_coverage_1/group_std_mean": 0.18843676149845123, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020678306696936487, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020678306696936487, "signal/frontier_coverage_10/centered_abs_mean": 0.14460354149341584, "signal/frontier_coverage_10/group_std_mean": 0.18843676149845123, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020678306696936487, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020678306696936487, "signal/frontier_coverage_15/centered_abs_mean": 0.14460354149341584, "signal/frontier_coverage_15/group_std_mean": 0.18843676149845123, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020678306696936487, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020678306696936487, "signal/frontier_coverage_20/centered_abs_mean": 0.1025826632976532, "signal/frontier_coverage_20/group_std_mean": 0.13485993444919586, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014669320778921246, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014669320778921246, "signal/frontier_coverage_25/centered_abs_mean": 0.0643330879509449, "signal/frontier_coverage_25/group_std_mean": 0.08459075540304184, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009199631633237005, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009199631633237005, "signal/frontier_coverage_5/centered_abs_mean": 0.14460354149341584, "signal/frontier_coverage_5/group_std_mean": 0.18843676149845123, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020678306696936487, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020678306696936487, "step": 235 }, { "calibration/aurc": 0.2686377140883339, "calibration/batch_distribution_entropy": 0.9594533632566217, "calibration/buffer_distribution_entropy": 0.9578625978207885, "calibration/confidence_entropy": 0.4627788377016112, "calibration/coverage@0%": 0.056640625, "calibration/coverage@1%": 0.0703125, "calibration/coverage@10%": 0.269140625, "calibration/coverage@15%": 0.32890625, "calibration/coverage@20%": 0.41996162548923677, "calibration/coverage@25%": 0.47818539016634054, "calibration/coverage@30%": 0.5680505442759295, "calibration/coverage@5%": 0.17578125, "calibration/ece": 0.18309338930519575, "calibration/mean_confidence": 0.4839827718853355, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 520.0, "completions/max_terminated_length": 520.0, "completions/mean_length": 194.88076171875, "completions/mean_terminated_length": 194.92012939453124, "completions/min_length": 74.0, "completions/min_terminated_length": 93.2, "epoch": 0.768, "grad_norm": 0.0009122573537752032, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 807952094.0, "reward": 0.9469700813293457, "reward_std": 0.06870835795998573, "rewards/accuracy_reward": 0.51123046875, "rewards/brier_reward": 0.8135611772537231, "rewards/confidence_uniqueness_reward": 0.9532927393913269, "rewards/format_reward": 0.99970703125, "rewards/frontier_coverage_0": 0.16543679535388947, "rewards/frontier_coverage_1": 0.16543679535388947, "rewards/frontier_coverage_10": 0.16543679535388947, "rewards/frontier_coverage_15": 0.16529496312141417, "rewards/frontier_coverage_20": 0.12598445862531663, "rewards/frontier_coverage_25": 0.08305399417877198, "rewards/frontier_coverage_5": 0.16543679535388947, "signal/accuracy_reward/centered_abs_mean": 0.091339111328125, "signal/accuracy_reward/group_std_mean": 0.11980533748865127, "signal/accuracy_reward/group_zero_std_frac": 0.6625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0456695556640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0456695556640625, "signal/advantage_abs_mean": 0.052151027321815493, "signal/advantage_pre_scale_abs_mean": 0.052151027321815493, "signal/advantage_pre_scale_std": 0.0985235944390297, "signal/advantage_std": 0.0985235944390297, "signal/brier_reward/centered_abs_mean": 0.11318521946668625, "signal/brier_reward/group_std_mean": 0.14390725791454315, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011318522319197655, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011318522319197655, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02046610079705715, "signal/confidence_uniqueness_reward/group_std_mean": 0.026208149269223213, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0020466101123020053, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020466101123020053, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_coverage_0/centered_abs_mean": 0.1562270164489746, "signal/frontier_coverage_0/group_std_mean": 0.20009250938892365, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022340463940054177, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022340463940054177, "signal/frontier_coverage_1/centered_abs_mean": 0.1562270164489746, "signal/frontier_coverage_1/group_std_mean": 0.20009250938892365, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022340463940054177, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022340463940054177, "signal/frontier_coverage_10/centered_abs_mean": 0.1562270164489746, "signal/frontier_coverage_10/group_std_mean": 0.20009250938892365, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022340463940054177, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022340463940054177, "signal/frontier_coverage_15/centered_abs_mean": 0.15608695149421692, "signal/frontier_coverage_15/group_std_mean": 0.19990470111370087, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022320433985441924, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022320433985441924, "signal/frontier_coverage_20/centered_abs_mean": 0.11054950356483459, "signal/frontier_coverage_20/group_std_mean": 0.14203203320503235, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015808578580617904, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015808578580617904, "signal/frontier_coverage_25/centered_abs_mean": 0.06979388296604157, "signal/frontier_coverage_25/group_std_mean": 0.08927626758813859, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009980525122955442, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009980525122955442, "signal/frontier_coverage_5/centered_abs_mean": 0.1562270164489746, "signal/frontier_coverage_5/group_std_mean": 0.20009250938892365, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022340463940054177, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022340463940054177, "step": 240 }, { "calibration/aurc": 0.3197772747706285, "calibration/batch_distribution_entropy": 0.9217777615802009, "calibration/buffer_distribution_entropy": 0.9572320168834499, "calibration/confidence_entropy": 0.4192123665672717, "calibration/coverage@0%": 0.049236331947162426, "calibration/coverage@1%": 0.051189456947162426, "calibration/coverage@10%": 0.20206778987279844, "calibration/coverage@15%": 0.2517153864970646, "calibration/coverage@20%": 0.2955112524461839, "calibration/coverage@25%": 0.3643162915851272, "calibration/coverage@30%": 0.4334989603718199, "calibration/coverage@5%": 0.14654170743639922, "calibration/ece": 0.16162157978207495, "calibration/mean_confidence": 0.5331659593344009, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 517.2, "completions/max_terminated_length": 517.2, "completions/mean_length": 194.9912109375, "completions/mean_terminated_length": 195.0108184814453, "completions/min_length": 77.4, "completions/min_terminated_length": 96.2, "epoch": 0.784, "grad_norm": 0.0007836997392587364, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 825123172.0, "reward": 0.9639974594116211, "reward_std": 0.06840100809931755, "rewards/accuracy_reward": 0.56015625, "rewards/brier_reward": 0.7868908882141114, "rewards/confidence_uniqueness_reward": 0.9535213708877563, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.11056532710790634, "rewards/frontier_coverage_1": 0.11056532710790634, "rewards/frontier_coverage_10": 0.11056532710790634, "rewards/frontier_coverage_15": 0.1084191419184208, "rewards/frontier_coverage_20": 0.08252720981836319, "rewards/frontier_coverage_25": 0.06098323464393616, "rewards/frontier_coverage_5": 0.11056532710790634, "signal/accuracy_reward/centered_abs_mean": 0.0933349609375, "signal/accuracy_reward/group_std_mean": 0.1282936602830887, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04666748046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04666748046875, "signal/advantage_abs_mean": 0.05056538209319115, "signal/advantage_pre_scale_abs_mean": 0.05056538209319115, "signal/advantage_pre_scale_std": 0.09555504471063614, "signal/advantage_std": 0.09555504471063614, "signal/brier_reward/centered_abs_mean": 0.11904580742120743, "signal/brier_reward/group_std_mean": 0.15341890454292298, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011904580891132355, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011904580891132355, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019766898453235628, "signal/confidence_uniqueness_reward/group_std_mean": 0.024970807135105133, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0019766898592934014, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019766898592934014, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15665509104728698, "signal/frontier_coverage_0/group_std_mean": 0.20551926791667938, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022401677910238505, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022401677910238505, "signal/frontier_coverage_1/centered_abs_mean": 0.15665509104728698, "signal/frontier_coverage_1/group_std_mean": 0.20551926791667938, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022401677910238505, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022401677910238505, "signal/frontier_coverage_10/centered_abs_mean": 0.15665509104728698, "signal/frontier_coverage_10/group_std_mean": 0.20551926791667938, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022401677910238505, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022401677910238505, "signal/frontier_coverage_15/centered_abs_mean": 0.1528875708580017, "signal/frontier_coverage_15/group_std_mean": 0.20064776241779328, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002186292293481529, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002186292293481529, "signal/frontier_coverage_20/centered_abs_mean": 0.10197529047727585, "signal/frontier_coverage_20/group_std_mean": 0.13459382504224776, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014582466334104537, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014582466334104537, "signal/frontier_coverage_25/centered_abs_mean": 0.06756449341773987, "signal/frontier_coverage_25/group_std_mean": 0.08749876469373703, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009661722113378346, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009661722113378346, "signal/frontier_coverage_5/centered_abs_mean": 0.15665509104728698, "signal/frontier_coverage_5/group_std_mean": 0.20551926791667938, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022401677910238505, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022401677910238505, "step": 245 }, { "calibration/aurc": 0.20359943680556297, "calibration/batch_distribution_entropy": 0.9231851021411351, "calibration/buffer_distribution_entropy": 0.9563622314749581, "calibration/confidence_entropy": 0.4116085176287439, "calibration/coverage@0%": 0.015625, "calibration/coverage@1%": 0.015625, "calibration/coverage@10%": 0.262109375, "calibration/coverage@15%": 0.471484375, "calibration/coverage@20%": 0.588671875, "calibration/coverage@25%": 0.662109375, "calibration/coverage@30%": 0.752734375, "calibration/coverage@5%": 0.07421875, "calibration/ece": 0.09644673492421116, "calibration/mean_confidence": 0.5133434148481435, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 451.8, "completions/max_terminated_length": 451.8, "completions/mean_length": 194.51513671875, "completions/mean_terminated_length": 194.53388671875, "completions/min_length": 78.4, "completions/min_terminated_length": 98.0, "epoch": 0.8, "grad_norm": 0.0010863860370591283, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 842125567.0, "reward": 0.9804993987083435, "reward_std": 0.06534842252731324, "rewards/accuracy_reward": 0.58642578125, "rewards/brier_reward": 0.8142348051071167, "rewards/confidence_uniqueness_reward": 0.9505360841751098, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.12197792679071426, "rewards/frontier_coverage_1": 0.12197792679071426, "rewards/frontier_coverage_10": 0.12197792679071426, "rewards/frontier_coverage_15": 0.11846152395009994, "rewards/frontier_coverage_20": 0.08628106266260147, "rewards/frontier_coverage_25": 0.06666406691074371, "rewards/frontier_coverage_5": 0.12197792679071426, "signal/accuracy_reward/centered_abs_mean": 0.089093017578125, "signal/accuracy_reward/group_std_mean": 0.11905532628297806, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0445465087890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0445465087890625, "signal/advantage_abs_mean": 0.04896164536476135, "signal/advantage_pre_scale_abs_mean": 0.04896164536476135, "signal/advantage_pre_scale_std": 0.09614251106977463, "signal/advantage_std": 0.09614251106977463, "signal/brier_reward/centered_abs_mean": 0.10979892462491989, "signal/brier_reward/group_std_mean": 0.14249781668186187, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010979892686009407, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010979892686009407, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022416341677308084, "signal/confidence_uniqueness_reward/group_std_mean": 0.028693411871790885, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022416341584175826, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022416341584175826, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14336768090724944, "signal/frontier_coverage_0/group_std_mean": 0.18752407133579255, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020501580554991962, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020501580554991962, "signal/frontier_coverage_1/centered_abs_mean": 0.14336768090724944, "signal/frontier_coverage_1/group_std_mean": 0.18752407133579255, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020501580554991962, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020501580554991962, "signal/frontier_coverage_10/centered_abs_mean": 0.14336768090724944, "signal/frontier_coverage_10/group_std_mean": 0.18752407133579255, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020501580554991962, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020501580554991962, "signal/frontier_coverage_15/centered_abs_mean": 0.13639699816703796, "signal/frontier_coverage_15/group_std_mean": 0.17871999144554138, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019504770869389176, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019504770869389176, "signal/frontier_coverage_20/centered_abs_mean": 0.09066965878009796, "signal/frontier_coverage_20/group_std_mean": 0.11953730136156082, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001296576135791838, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001296576135791838, "signal/frontier_coverage_25/centered_abs_mean": 0.060934024304151534, "signal/frontier_coverage_25/group_std_mean": 0.07885423004627228, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008713565533980727, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008713565533980727, "signal/frontier_coverage_5/centered_abs_mean": 0.14336768090724944, "signal/frontier_coverage_5/group_std_mean": 0.18752407133579255, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020501580554991962, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020501580554991962, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.4583315773752878, "eval_calibration/batch_distribution_entropy": 0.893029162438776, "eval_calibration/buffer_distribution_entropy": 0.9562239211237429, "eval_calibration/confidence_entropy": 0.40999648241332165, "eval_calibration/coverage@0%": 0.1015625, "eval_calibration/coverage@1%": 0.1015625, "eval_calibration/coverage@10%": 0.1015625, "eval_calibration/coverage@15%": 0.1171875, "eval_calibration/coverage@20%": 0.1484375, "eval_calibration/coverage@25%": 0.1875, "eval_calibration/coverage@30%": 0.1875, "eval_calibration/coverage@5%": 0.1015625, "eval_calibration/ece": 0.21749381230468748, "eval_calibration/mean_confidence": 0.4812561876953125, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 341.5, "eval_completions/max_terminated_length": 341.5, "eval_completions/mean_length": 200.39931106567383, "eval_completions/mean_terminated_length": 200.39931106567383, "eval_completions/min_length": 113.5, "eval_completions/min_terminated_length": 113.5, "eval_loss": 0.0, "eval_num_tokens": 842125567.0, "eval_reward": 0.9045292139053345, "eval_reward_std": 0.23102111369371414, "eval_rewards/accuracy_reward": 0.4375, "eval_rewards/brier_reward": 0.7857947647571564, "eval_rewards/confidence_uniqueness_reward": 0.89599609375, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.20588579028844833, "eval_rewards/frontier_coverage_1": 0.20588579028844833, "eval_rewards/frontier_coverage_10": 0.20588579028844833, "eval_rewards/frontier_coverage_15": 0.1928391382098198, "eval_rewards/frontier_coverage_20": 0.13208013586699963, "eval_rewards/frontier_coverage_25": 0.08231428451836109, "eval_rewards/frontier_coverage_5": 0.20588579028844833, "eval_runtime": 18.2572, "eval_samples_per_second": 27.387, "eval_signal/accuracy_reward/centered_abs_mean": 0.478759765625, "eval_signal/accuracy_reward/group_std_mean": 0.49678919464349747, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2393798828125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2393798828125, "eval_signal/advantage_abs_mean": 0.21644001826643944, "eval_signal/advantage_pre_scale_abs_mean": 0.21644001826643944, "eval_signal/advantage_pre_scale_std": 0.22859064117074013, "eval_signal/advantage_std": 0.22859064117074013, "eval_signal/brier_reward/centered_abs_mean": 0.22941264510154724, "eval_signal/brier_reward/group_std_mean": 0.28201356530189514, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022941263858228922, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.022941263858228922, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0435028076171875, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.051835235208272934, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004350280680228025, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004350280680228025, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.3821023553609848, "eval_signal/frontier_coverage_0/group_std_mean": 0.4686368927359581, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005464063957333565, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005464063957333565, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3821023553609848, "eval_signal/frontier_coverage_1/group_std_mean": 0.4686368927359581, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005464063957333565, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005464063957333565, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3821023553609848, "eval_signal/frontier_coverage_10/group_std_mean": 0.4686368927359581, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005464063957333565, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005464063957333565, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.35555680841207504, "eval_signal/frontier_coverage_15/group_std_mean": 0.436874620616436, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005084462347440422, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005084462347440422, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.23739305138587952, "eval_signal/frontier_coverage_20/group_std_mean": 0.2956453561782837, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033947205520235, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033947205520235, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.12688638269901276, "eval_signal/frontier_coverage_25/group_std_mean": 0.16197730228304863, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018144752539228648, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018144752539228648, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3821023553609848, "eval_signal/frontier_coverage_5/group_std_mean": 0.4686368927359581, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005464063957333565, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005464063957333565, "eval_steps_per_second": 0.219, "step": 250 }, { "calibration/aurc": 0.23015374464443203, "calibration/batch_distribution_entropy": 0.8576458054480749, "calibration/buffer_distribution_entropy": 0.9553468512920373, "calibration/confidence_entropy": 0.3716785624732463, "calibration/coverage@0%": 0.004296875, "calibration/coverage@1%": 0.046875, "calibration/coverage@10%": 0.16953125, "calibration/coverage@15%": 0.233203125, "calibration/coverage@20%": 0.431640625, "calibration/coverage@25%": 0.6375, "calibration/coverage@30%": 0.790625, "calibration/coverage@5%": 0.11796875, "calibration/ece": 0.14930048014804023, "calibration/mean_confidence": 0.5608904347646698, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 456.8, "completions/max_terminated_length": 456.8, "completions/mean_length": 194.08486328125, "completions/mean_terminated_length": 194.10433349609374, "completions/min_length": 76.8, "completions/min_terminated_length": 96.2, "epoch": 0.816, "grad_norm": 0.0009816524107009172, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 859212164.0, "reward": 0.9736658692359924, "reward_std": 0.06884004175662994, "rewards/accuracy_reward": 0.58291015625, "rewards/brier_reward": 0.7866194605827331, "rewards/confidence_uniqueness_reward": 0.9460584640502929, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.0991329938173294, "rewards/frontier_coverage_1": 0.0991329938173294, "rewards/frontier_coverage_10": 0.0991329938173294, "rewards/frontier_coverage_15": 0.0950236402451992, "rewards/frontier_coverage_20": 0.07604901492595673, "rewards/frontier_coverage_25": 0.061194049566984175, "rewards/frontier_coverage_5": 0.0991329938173294, "signal/accuracy_reward/centered_abs_mean": 0.088104248046875, "signal/accuracy_reward/group_std_mean": 0.1207100659608841, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0440521240234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0440521240234375, "signal/advantage_abs_mean": 0.05134280025959015, "signal/advantage_pre_scale_abs_mean": 0.05134280025959015, "signal/advantage_pre_scale_std": 0.09874342083930969, "signal/advantage_std": 0.09874342083930969, "signal/brier_reward/centered_abs_mean": 0.12759677469730377, "signal/brier_reward/group_std_mean": 0.16383454203605652, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012759677693247795, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012759677693247795, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02629918046295643, "signal/confidence_uniqueness_reward/group_std_mean": 0.03357274830341339, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026299181394279004, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026299181394279004, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15177842378616332, "signal/frontier_coverage_0/group_std_mean": 0.19879043400287627, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021704314742237328, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021704314742237328, "signal/frontier_coverage_1/centered_abs_mean": 0.15177842378616332, "signal/frontier_coverage_1/group_std_mean": 0.19879043400287627, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021704314742237328, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021704314742237328, "signal/frontier_coverage_10/centered_abs_mean": 0.15177842378616332, "signal/frontier_coverage_10/group_std_mean": 0.19879043400287627, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021704314742237328, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021704314742237328, "signal/frontier_coverage_15/centered_abs_mean": 0.14039504528045654, "signal/frontier_coverage_15/group_std_mean": 0.18412175476551057, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00200764921028167, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00200764921028167, "signal/frontier_coverage_20/centered_abs_mean": 0.0951578825712204, "signal/frontier_coverage_20/group_std_mean": 0.12507294863462448, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013607577420771122, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013607577420771122, "signal/frontier_coverage_25/centered_abs_mean": 0.06570944413542748, "signal/frontier_coverage_25/group_std_mean": 0.08424745202064514, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000939645036123693, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000939645036123693, "signal/frontier_coverage_5/centered_abs_mean": 0.15177842378616332, "signal/frontier_coverage_5/group_std_mean": 0.19879043400287627, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021704314742237328, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021704314742237328, "step": 255 }, { "calibration/aurc": 0.28512608942683115, "calibration/batch_distribution_entropy": 0.9006837544803672, "calibration/buffer_distribution_entropy": 0.9535822255765938, "calibration/confidence_entropy": 0.40338913422993167, "calibration/coverage@0%": 0.02890625, "calibration/coverage@1%": 0.02890625, "calibration/coverage@10%": 0.21484375, "calibration/coverage@15%": 0.26953125, "calibration/coverage@20%": 0.33671875, "calibration/coverage@25%": 0.402734375, "calibration/coverage@30%": 0.544140625, "calibration/coverage@5%": 0.1546875, "calibration/ece": 0.13350636173462058, "calibration/mean_confidence": 0.5006771214138114, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 439.2, "completions/max_terminated_length": 439.2, "completions/mean_length": 195.5736328125, "completions/mean_terminated_length": 195.5927001953125, "completions/min_length": 76.8, "completions/min_terminated_length": 96.0, "epoch": 0.832, "grad_norm": 0.0008164091850630939, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 876223190.0, "reward": 0.9648186922073364, "reward_std": 0.0649384766817093, "rewards/accuracy_reward": 0.553125, "rewards/brier_reward": 0.8103304743766785, "rewards/confidence_uniqueness_reward": 0.9441303372383117, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.14813297837972642, "rewards/frontier_coverage_1": 0.14813297837972642, "rewards/frontier_coverage_10": 0.14813297837972642, "rewards/frontier_coverage_15": 0.13433591276407242, "rewards/frontier_coverage_20": 0.09811097532510757, "rewards/frontier_coverage_25": 0.07424705252051353, "rewards/frontier_coverage_5": 0.14813297837972642, "signal/accuracy_reward/centered_abs_mean": 0.0893310546875, "signal/accuracy_reward/group_std_mean": 0.12017730772495269, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04466552734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04466552734375, "signal/advantage_abs_mean": 0.048449646681547165, "signal/advantage_pre_scale_abs_mean": 0.048449646681547165, "signal/advantage_pre_scale_std": 0.09641486257314683, "signal/advantage_std": 0.09641486257314683, "signal/brier_reward/centered_abs_mean": 0.11072713136672974, "signal/brier_reward/group_std_mean": 0.1412588134407997, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011072713136672973, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011072713136672973, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02563716545701027, "signal/confidence_uniqueness_reward/group_std_mean": 0.03275141529738903, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025637165643274786, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025637165643274786, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14630222022533418, "signal/frontier_coverage_0/group_std_mean": 0.189518603682518, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002092121751047671, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002092121751047671, "signal/frontier_coverage_1/centered_abs_mean": 0.14630222022533418, "signal/frontier_coverage_1/group_std_mean": 0.189518603682518, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002092121751047671, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002092121751047671, "signal/frontier_coverage_10/centered_abs_mean": 0.14630222022533418, "signal/frontier_coverage_10/group_std_mean": 0.189518603682518, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002092121751047671, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002092121751047671, "signal/frontier_coverage_15/centered_abs_mean": 0.1310984805226326, "signal/frontier_coverage_15/group_std_mean": 0.16997389793395995, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018747082212939858, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018747082212939858, "signal/frontier_coverage_20/centered_abs_mean": 0.09214921295642853, "signal/frontier_coverage_20/group_std_mean": 0.11910515576601029, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013177337590605021, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013177337590605021, "signal/frontier_coverage_25/centered_abs_mean": 0.062527135014534, "signal/frontier_coverage_25/group_std_mean": 0.07901622802019119, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008941379957832396, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008941379957832396, "signal/frontier_coverage_5/centered_abs_mean": 0.14630222022533418, "signal/frontier_coverage_5/group_std_mean": 0.189518603682518, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002092121751047671, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002092121751047671, "step": 260 }, { "calibration/aurc": 0.30311595878798575, "calibration/batch_distribution_entropy": 0.9193359670756618, "calibration/buffer_distribution_entropy": 0.9533215398340383, "calibration/confidence_entropy": 0.4322883837207173, "calibration/coverage@0%": 0.023046875, "calibration/coverage@1%": 0.023046875, "calibration/coverage@10%": 0.20078125, "calibration/coverage@15%": 0.271484375, "calibration/coverage@20%": 0.429296875, "calibration/coverage@25%": 0.497265625, "calibration/coverage@30%": 0.5578125, "calibration/coverage@5%": 0.125, "calibration/ece": 0.1747222471321523, "calibration/mean_confidence": 0.5455925214912242, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 425.4, "completions/max_terminated_length": 425.4, "completions/mean_length": 195.658984375, "completions/mean_terminated_length": 195.67786865234376, "completions/min_length": 79.8, "completions/min_terminated_length": 99.0, "epoch": 0.848, "grad_norm": 0.0009759237291291356, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 893241106.0, "reward": 0.9537703156471252, "reward_std": 0.06543072313070297, "rewards/accuracy_reward": 0.53271484375, "rewards/brier_reward": 0.7992340207099915, "rewards/confidence_uniqueness_reward": 0.9499642372131347, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.14569487571716308, "rewards/frontier_coverage_1": 0.14569487571716308, "rewards/frontier_coverage_10": 0.14569487571716308, "rewards/frontier_coverage_15": 0.13636898696422578, "rewards/frontier_coverage_20": 0.09246301501989365, "rewards/frontier_coverage_25": 0.06544329449534417, "rewards/frontier_coverage_5": 0.14569487571716308, "signal/accuracy_reward/centered_abs_mean": 0.082806396484375, "signal/accuracy_reward/group_std_mean": 0.11064963936805725, "signal/accuracy_reward/group_zero_std_frac": 0.68125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0414031982421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0414031982421875, "signal/advantage_abs_mean": 0.0498686358332634, "signal/advantage_pre_scale_abs_mean": 0.0498686358332634, "signal/advantage_pre_scale_std": 0.09574073255062103, "signal/advantage_std": 0.09574073255062103, "signal/brier_reward/centered_abs_mean": 0.11890813261270523, "signal/brier_reward/group_std_mean": 0.1514558345079422, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011890813149511814, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011890813149511814, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022554631531238555, "signal/confidence_uniqueness_reward/group_std_mean": 0.028490035980939864, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022554632276296617, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022554632276296617, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14929589331150056, "signal/frontier_coverage_0/group_std_mean": 0.19481739699840545, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002134931227192283, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002134931227192283, "signal/frontier_coverage_1/centered_abs_mean": 0.14929589331150056, "signal/frontier_coverage_1/group_std_mean": 0.19481739699840545, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002134931227192283, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002134931227192283, "signal/frontier_coverage_10/centered_abs_mean": 0.14929589331150056, "signal/frontier_coverage_10/group_std_mean": 0.19481739699840545, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002134931227192283, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002134931227192283, "signal/frontier_coverage_15/centered_abs_mean": 0.1377663642168045, "signal/frontier_coverage_15/group_std_mean": 0.1799382120370865, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019700590055435896, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019700590055435896, "signal/frontier_coverage_20/centered_abs_mean": 0.09448865950107574, "signal/frontier_coverage_20/group_std_mean": 0.12360241562128067, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001351187820546329, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001351187820546329, "signal/frontier_coverage_25/centered_abs_mean": 0.061427921056747437, "signal/frontier_coverage_25/group_std_mean": 0.07933543026447296, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008784192497842014, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008784192497842014, "signal/frontier_coverage_5/centered_abs_mean": 0.14929589331150056, "signal/frontier_coverage_5/group_std_mean": 0.19481739699840545, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002134931227192283, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002134931227192283, "step": 265 }, { "calibration/aurc": 0.2665301529748049, "calibration/batch_distribution_entropy": 0.9337003352908582, "calibration/buffer_distribution_entropy": 0.9544330118977861, "calibration/confidence_entropy": 0.4758372413036889, "calibration/coverage@0%": 0.009765625, "calibration/coverage@1%": 0.009765625, "calibration/coverage@10%": 0.1109375, "calibration/coverage@15%": 0.23359375, "calibration/coverage@20%": 0.326953125, "calibration/coverage@25%": 0.430078125, "calibration/coverage@30%": 0.547265625, "calibration/coverage@5%": 0.053125, "calibration/ece": 0.13886920481639256, "calibration/mean_confidence": 0.5781809774169434, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 674.8, "completions/max_terminated_length": 674.8, "completions/mean_length": 196.86259765625, "completions/mean_terminated_length": 196.88185119628906, "completions/min_length": 80.8, "completions/min_terminated_length": 100.8, "epoch": 0.864, "grad_norm": 0.001146928290836513, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 910243795.0, "reward": 0.9781444787979126, "reward_std": 0.06458824276924133, "rewards/accuracy_reward": 0.59140625, "rewards/brier_reward": 0.7921436786651611, "rewards/confidence_uniqueness_reward": 0.9507281303405761, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.09182494133710861, "rewards/frontier_coverage_1": 0.09182494133710861, "rewards/frontier_coverage_10": 0.09182494133710861, "rewards/frontier_coverage_15": 0.0870552383363247, "rewards/frontier_coverage_20": 0.06757164672017098, "rewards/frontier_coverage_25": 0.051709264516830444, "rewards/frontier_coverage_5": 0.09182494133710861, "signal/accuracy_reward/centered_abs_mean": 0.0930908203125, "signal/accuracy_reward/group_std_mean": 0.12001070380210876, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04654541015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04654541015625, "signal/advantage_abs_mean": 0.04986320808529854, "signal/advantage_pre_scale_abs_mean": 0.04986320808529854, "signal/advantage_pre_scale_std": 0.09519556760787964, "signal/advantage_std": 0.09519556760787964, "signal/brier_reward/centered_abs_mean": 0.11273131370544434, "signal/brier_reward/group_std_mean": 0.14456582069396973, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011273131892085075, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011273131892085075, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021527956053614617, "signal/confidence_uniqueness_reward/group_std_mean": 0.027080774307250977, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002152795670554042, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002152795670554042, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15594776272773742, "signal/frontier_coverage_0/group_std_mean": 0.20174038112163545, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002230052975937724, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002230052975937724, "signal/frontier_coverage_1/centered_abs_mean": 0.15594776272773742, "signal/frontier_coverage_1/group_std_mean": 0.20174038112163545, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002230052975937724, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002230052975937724, "signal/frontier_coverage_10/centered_abs_mean": 0.15594776272773742, "signal/frontier_coverage_10/group_std_mean": 0.20174038112163545, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002230052975937724, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002230052975937724, "signal/frontier_coverage_15/centered_abs_mean": 0.1416664719581604, "signal/frontier_coverage_15/group_std_mean": 0.18316128849983215, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020258305361494423, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020258305361494423, "signal/frontier_coverage_20/centered_abs_mean": 0.09624775648117065, "signal/frontier_coverage_20/group_std_mean": 0.12451988160610199, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013763429131358861, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013763429131358861, "signal/frontier_coverage_25/centered_abs_mean": 0.059066733717918395, "signal/frontier_coverage_25/group_std_mean": 0.0757571741938591, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008446542662568391, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008446542662568391, "signal/frontier_coverage_5/centered_abs_mean": 0.15594776272773742, "signal/frontier_coverage_5/group_std_mean": 0.20174038112163545, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002230052975937724, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002230052975937724, "step": 270 }, { "calibration/aurc": 0.3452574331468948, "calibration/batch_distribution_entropy": 0.9465358312516123, "calibration/buffer_distribution_entropy": 0.9557025260532088, "calibration/confidence_entropy": 0.44704613050715925, "calibration/coverage@0%": 0.019534307729941292, "calibration/coverage@1%": 0.019534307729941292, "calibration/coverage@10%": 0.05275807240704501, "calibration/coverage@15%": 0.07698523116438356, "calibration/coverage@20%": 0.17000596257338552, "calibration/coverage@25%": 0.32013973825831704, "calibration/coverage@30%": 0.4193860078277886, "calibration/coverage@5%": 0.029319043542074364, "calibration/ece": 0.14351819665830193, "calibration/mean_confidence": 0.48057255943470245, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 477.8, "completions/max_terminated_length": 477.8, "completions/mean_length": 195.49287109375, "completions/mean_terminated_length": 195.512158203125, "completions/min_length": 78.6, "completions/min_terminated_length": 98.2, "epoch": 0.88, "grad_norm": 0.0009138612658716738, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 927392714.0, "reward": 0.9463012337684631, "reward_std": 0.06559450551867485, "rewards/accuracy_reward": 0.51484375, "rewards/brier_reward": 0.8050420165061951, "rewards/confidence_uniqueness_reward": 0.9523642301559448, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.1560472682118416, "rewards/frontier_coverage_1": 0.1560472682118416, "rewards/frontier_coverage_10": 0.1560472682118416, "rewards/frontier_coverage_15": 0.13759579956531526, "rewards/frontier_coverage_20": 0.09614049047231674, "rewards/frontier_coverage_25": 0.06428121700882912, "rewards/frontier_coverage_5": 0.1560472682118416, "signal/accuracy_reward/centered_abs_mean": 0.09307861328125, "signal/accuracy_reward/group_std_mean": 0.1208904430270195, "signal/accuracy_reward/group_zero_std_frac": 0.659375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.046539306640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.046539306640625, "signal/advantage_abs_mean": 0.050657791644334794, "signal/advantage_pre_scale_abs_mean": 0.050657791644334794, "signal/advantage_pre_scale_std": 0.09581429213285446, "signal/advantage_std": 0.09581429213285446, "signal/brier_reward/centered_abs_mean": 0.10857920050621032, "signal/brier_reward/group_std_mean": 0.13950212001800538, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010857920348644256, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010857920348644256, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01955595873296261, "signal/confidence_uniqueness_reward/group_std_mean": 0.02445173226296902, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0019555958919227124, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019555958919227124, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15932937264442443, "signal/frontier_coverage_0/group_std_mean": 0.20401280820369722, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022784100845456125, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022784100845456125, "signal/frontier_coverage_1/centered_abs_mean": 0.15932937264442443, "signal/frontier_coverage_1/group_std_mean": 0.20401280820369722, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022784100845456125, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022784100845456125, "signal/frontier_coverage_10/centered_abs_mean": 0.15932937264442443, "signal/frontier_coverage_10/group_std_mean": 0.20401280820369722, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022784100845456125, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022784100845456125, "signal/frontier_coverage_15/centered_abs_mean": 0.13971571624279022, "signal/frontier_coverage_15/group_std_mean": 0.17929893136024475, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019979347474873067, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019979347474873067, "signal/frontier_coverage_20/centered_abs_mean": 0.09661854058504105, "signal/frontier_coverage_20/group_std_mean": 0.12456222176551819, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013816451421007514, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013816451421007514, "signal/frontier_coverage_25/centered_abs_mean": 0.05862758159637451, "signal/frontier_coverage_25/group_std_mean": 0.07523611336946487, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008383744047023356, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008383744047023356, "signal/frontier_coverage_5/centered_abs_mean": 0.15932937264442443, "signal/frontier_coverage_5/group_std_mean": 0.20401280820369722, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022784100845456125, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022784100845456125, "step": 275 }, { "calibration/aurc": 0.3462076894170047, "calibration/batch_distribution_entropy": 0.9425424465087536, "calibration/buffer_distribution_entropy": 0.9567942420251466, "calibration/confidence_entropy": 0.4741509430206251, "calibration/coverage@0%": 0.027734375, "calibration/coverage@1%": 0.027734375, "calibration/coverage@10%": 0.08671875, "calibration/coverage@15%": 0.146484375, "calibration/coverage@20%": 0.20078125, "calibration/coverage@25%": 0.3234375, "calibration/coverage@30%": 0.47890625, "calibration/coverage@5%": 0.03359375, "calibration/ece": 0.14865247684944954, "calibration/mean_confidence": 0.5141867801386891, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 458.6, "completions/max_terminated_length": 458.6, "completions/mean_length": 199.56806640625, "completions/mean_terminated_length": 199.58777465820313, "completions/min_length": 80.6, "completions/min_terminated_length": 98.8, "epoch": 0.896, "grad_norm": 0.001016242429614067, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 944547139.0, "reward": 0.9603770971298218, "reward_std": 0.06262253299355507, "rewards/accuracy_reward": 0.5482421875, "rewards/brier_reward": 0.8026605606079101, "rewards/confidence_uniqueness_reward": 0.9549548506736756, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.12388900071382522, "rewards/frontier_coverage_1": 0.12388900071382522, "rewards/frontier_coverage_10": 0.12388900071382522, "rewards/frontier_coverage_15": 0.10794235169887542, "rewards/frontier_coverage_20": 0.07755922675132751, "rewards/frontier_coverage_25": 0.056235866993665694, "rewards/frontier_coverage_5": 0.12388900071382522, "signal/accuracy_reward/centered_abs_mean": 0.0889404296875, "signal/accuracy_reward/group_std_mean": 0.12086254060268402, "signal/accuracy_reward/group_zero_std_frac": 0.64375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04447021484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04447021484375, "signal/advantage_abs_mean": 0.0463208869099617, "signal/advantage_pre_scale_abs_mean": 0.0463208869099617, "signal/advantage_pre_scale_std": 0.09083856195211411, "signal/advantage_std": 0.09083856195211411, "signal/brier_reward/centered_abs_mean": 0.10326587110757827, "signal/brier_reward/group_std_mean": 0.13239023983478546, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010326587595045567, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010326587595045567, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01764247938990593, "signal/confidence_uniqueness_reward/group_std_mean": 0.022220425307750702, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0017642479855567218, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017642479855567218, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.150966015458107, "signal/frontier_coverage_0/group_std_mean": 0.19346502125263215, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002158814016729593, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002158814016729593, "signal/frontier_coverage_1/centered_abs_mean": 0.150966015458107, "signal/frontier_coverage_1/group_std_mean": 0.19346502125263215, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002158814016729593, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002158814016729593, "signal/frontier_coverage_10/centered_abs_mean": 0.150966015458107, "signal/frontier_coverage_10/group_std_mean": 0.19346502125263215, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002158814016729593, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002158814016729593, "signal/frontier_coverage_15/centered_abs_mean": 0.12611357122659683, "signal/frontier_coverage_15/group_std_mean": 0.161882221698761, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018034240463748574, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018034240463748574, "signal/frontier_coverage_20/centered_abs_mean": 0.08738774359226227, "signal/frontier_coverage_20/group_std_mean": 0.11280497461557389, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001249644672498107, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001249644672498107, "signal/frontier_coverage_25/centered_abs_mean": 0.053803355991840364, "signal/frontier_coverage_25/group_std_mean": 0.06944843530654907, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007693879655562341, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007693879655562341, "signal/frontier_coverage_5/centered_abs_mean": 0.150966015458107, "signal/frontier_coverage_5/group_std_mean": 0.19346502125263215, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002158814016729593, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002158814016729593, "step": 280 }, { "calibration/aurc": 0.35427870476059475, "calibration/batch_distribution_entropy": 0.9575071938085074, "calibration/buffer_distribution_entropy": 0.9583187097828064, "calibration/confidence_entropy": 0.49101955323962176, "calibration/coverage@0%": 0.020703125, "calibration/coverage@1%": 0.020703125, "calibration/coverage@10%": 0.096875, "calibration/coverage@15%": 0.135546875, "calibration/coverage@20%": 0.290625, "calibration/coverage@25%": 0.37890625, "calibration/coverage@30%": 0.47890625, "calibration/coverage@5%": 0.053125, "calibration/ece": 0.14715639573538566, "calibration/mean_confidence": 0.4909093624526076, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 503.4, "completions/max_terminated_length": 503.4, "completions/mean_length": 204.24189453125, "completions/mean_terminated_length": 204.30109558105468, "completions/min_length": 43.8, "completions/min_terminated_length": 105.0, "epoch": 0.912, "grad_norm": 0.0006890616496093571, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 961689872.0, "reward": 0.960372805595398, "reward_std": 0.06250972747802734, "rewards/accuracy_reward": 0.5474609375, "rewards/brier_reward": 0.8091128706932068, "rewards/confidence_uniqueness_reward": 0.9568856477737426, "rewards/format_reward": 0.99970703125, "rewards/frontier_coverage_0": 0.1184326808899641, "rewards/frontier_coverage_1": 0.1184326808899641, "rewards/frontier_coverage_10": 0.1184326808899641, "rewards/frontier_coverage_15": 0.10469010137021542, "rewards/frontier_coverage_20": 0.07688896842300892, "rewards/frontier_coverage_25": 0.05720534510910511, "rewards/frontier_coverage_5": 0.1184326808899641, "signal/accuracy_reward/centered_abs_mean": 0.08143310546875, "signal/accuracy_reward/group_std_mean": 0.11135471612215042, "signal/accuracy_reward/group_zero_std_frac": 0.665625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.040716552734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.040716552734375, "signal/advantage_abs_mean": 0.04637853130698204, "signal/advantage_pre_scale_abs_mean": 0.04637853130698204, "signal/advantage_pre_scale_std": 0.08917968124151229, "signal/advantage_std": 0.08917968124151229, "signal/brier_reward/centered_abs_mean": 0.10574377328157425, "signal/brier_reward/group_std_mean": 0.13551586270332336, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01057437751442194, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01057437751442194, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01588448788970709, "signal/confidence_uniqueness_reward/group_std_mean": 0.02095335051417351, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0015884488122537733, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015884488122537733, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_coverage_0/centered_abs_mean": 0.15329268872737883, "signal/frontier_coverage_0/group_std_mean": 0.1948721706867218, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021920854225754736, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021920854225754736, "signal/frontier_coverage_1/centered_abs_mean": 0.15329268872737883, "signal/frontier_coverage_1/group_std_mean": 0.1948721706867218, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021920854225754736, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021920854225754736, "signal/frontier_coverage_10/centered_abs_mean": 0.15329268872737883, "signal/frontier_coverage_10/group_std_mean": 0.1948721706867218, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021920854225754736, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021920854225754736, "signal/frontier_coverage_15/centered_abs_mean": 0.12959499955177306, "signal/frontier_coverage_15/group_std_mean": 0.16456757485866547, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00185320854652673, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00185320854652673, "signal/frontier_coverage_20/centered_abs_mean": 0.08761304467916489, "signal/frontier_coverage_20/group_std_mean": 0.11146515905857086, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012528665363788604, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012528665363788604, "signal/frontier_coverage_25/centered_abs_mean": 0.05668332800269127, "signal/frontier_coverage_25/group_std_mean": 0.07238757461309434, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008105716085992753, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008105716085992753, "signal/frontier_coverage_5/centered_abs_mean": 0.15329268872737883, "signal/frontier_coverage_5/group_std_mean": 0.1948721706867218, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021920854225754736, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021920854225754736, "step": 285 }, { "calibration/aurc": 0.3856348088878695, "calibration/batch_distribution_entropy": 0.9619417296860739, "calibration/buffer_distribution_entropy": 0.9620296197016186, "calibration/confidence_entropy": 0.5035797239780982, "calibration/coverage@0%": 0.009386466487279843, "calibration/coverage@1%": 0.009386466487279843, "calibration/coverage@10%": 0.028926125244618395, "calibration/coverage@15%": 0.044947101272015656, "calibration/coverage@20%": 0.059801553326810176, "calibration/coverage@25%": 0.19144982265166338, "calibration/coverage@30%": 0.3309556934931507, "calibration/coverage@5%": 0.01173480308219178, "calibration/ece": 0.12592002169075053, "calibration/mean_confidence": 0.5046425573200831, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 416.6, "completions/max_terminated_length": 416.6, "completions/mean_length": 204.05205078125, "completions/mean_terminated_length": 204.11180114746094, "completions/min_length": 40.4, "completions/min_terminated_length": 101.0, "epoch": 0.928, "grad_norm": 0.0007080600480549037, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 978806181.0, "reward": 0.949990451335907, "reward_std": 0.06062187701463699, "rewards/accuracy_reward": 0.528125, "rewards/brier_reward": 0.7971256971359253, "rewards/confidence_uniqueness_reward": 0.9547683119773864, "rewards/format_reward": 0.99970703125, "rewards/frontier_coverage_0": 0.12781327366828918, "rewards/frontier_coverage_1": 0.12781327366828918, "rewards/frontier_coverage_10": 0.12781327366828918, "rewards/frontier_coverage_15": 0.11005319505929947, "rewards/frontier_coverage_20": 0.07983717322349548, "rewards/frontier_coverage_25": 0.060046466439962386, "rewards/frontier_coverage_5": 0.12781327366828918, "signal/accuracy_reward/centered_abs_mean": 0.076708984375, "signal/accuracy_reward/group_std_mean": 0.10556017011404037, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0383544921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0383544921875, "signal/advantage_abs_mean": 0.04432180598378181, "signal/advantage_pre_scale_abs_mean": 0.04432180598378181, "signal/advantage_pre_scale_std": 0.08894449770450592, "signal/advantage_std": 0.08894449770450592, "signal/brier_reward/centered_abs_mean": 0.10374047160148621, "signal/brier_reward/group_std_mean": 0.13460810035467147, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010374047234654427, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010374047234654427, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.017230465635657312, "signal/confidence_uniqueness_reward/group_std_mean": 0.022360032051801683, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001723046530969441, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001723046530969441, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_coverage_0/centered_abs_mean": 0.141362664103508, "signal/frontier_coverage_0/group_std_mean": 0.1842469871044159, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020214861258864405, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020214861258864405, "signal/frontier_coverage_1/centered_abs_mean": 0.141362664103508, "signal/frontier_coverage_1/group_std_mean": 0.1842469871044159, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020214861258864405, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020214861258864405, "signal/frontier_coverage_10/centered_abs_mean": 0.141362664103508, "signal/frontier_coverage_10/group_std_mean": 0.1842469871044159, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020214861258864405, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020214861258864405, "signal/frontier_coverage_15/centered_abs_mean": 0.11770967096090316, "signal/frontier_coverage_15/group_std_mean": 0.15376219451427459, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016832482069730759, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016832482069730759, "signal/frontier_coverage_20/centered_abs_mean": 0.07940128147602081, "signal/frontier_coverage_20/group_std_mean": 0.10401753634214402, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011354383546859025, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011354383546859025, "signal/frontier_coverage_25/centered_abs_mean": 0.053226197510957717, "signal/frontier_coverage_25/group_std_mean": 0.06920340955257416, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007611346081830561, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007611346081830561, "signal/frontier_coverage_5/centered_abs_mean": 0.141362664103508, "signal/frontier_coverage_5/group_std_mean": 0.1842469871044159, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020214861258864405, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020214861258864405, "step": 290 }, { "calibration/aurc": 0.25053891809513856, "calibration/batch_distribution_entropy": 0.9541330489936669, "calibration/buffer_distribution_entropy": 0.9650525357181208, "calibration/confidence_entropy": 0.4900174379706102, "calibration/coverage@0%": 0.0328125, "calibration/coverage@1%": 0.0328125, "calibration/coverage@10%": 0.205859375, "calibration/coverage@15%": 0.31953125, "calibration/coverage@20%": 0.42734375, "calibration/coverage@25%": 0.519921875, "calibration/coverage@30%": 0.601171875, "calibration/coverage@5%": 0.11640625, "calibration/ece": 0.08060715323311354, "calibration/mean_confidence": 0.4891742077465079, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/max_terminated_length": 451.0, "completions/mean_length": 207.48515625, "completions/mean_terminated_length": 207.48515625, "completions/min_length": 105.6, "completions/min_terminated_length": 105.6, "epoch": 0.944, "grad_norm": 0.0009179720655083656, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 995906253.0, "reward": 0.9544263243675232, "reward_std": 0.0705165296792984, "rewards/accuracy_reward": 0.5337890625, "rewards/brier_reward": 0.8030768990516662, "rewards/confidence_uniqueness_reward": 0.9553874850273132, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.1375230610370636, "rewards/frontier_coverage_1": 0.1375230610370636, "rewards/frontier_coverage_10": 0.13709916770458222, "rewards/frontier_coverage_15": 0.12249395102262498, "rewards/frontier_coverage_20": 0.08775933682918549, "rewards/frontier_coverage_25": 0.06064917892217636, "rewards/frontier_coverage_5": 0.1375230610370636, "signal/accuracy_reward/centered_abs_mean": 0.10455322265625, "signal/accuracy_reward/group_std_mean": 0.13749758303165435, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052276611328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.052276611328125, "signal/advantage_abs_mean": 0.05363398566842079, "signal/advantage_pre_scale_abs_mean": 0.05363398566842079, "signal/advantage_pre_scale_std": 0.10147667974233628, "signal/advantage_std": 0.10147667974233628, "signal/brier_reward/centered_abs_mean": 0.1024449646472931, "signal/brier_reward/group_std_mean": 0.13244157880544663, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01024449672549963, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01024449672549963, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.016994761675596236, "signal/confidence_uniqueness_reward/group_std_mean": 0.021610427275300027, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0016994762001559139, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016994762001559139, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.15415203273296357, "signal/frontier_coverage_0/group_std_mean": 0.1998533695936203, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022043741773813963, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022043741773813963, "signal/frontier_coverage_1/centered_abs_mean": 0.15415203273296357, "signal/frontier_coverage_1/group_std_mean": 0.1998533695936203, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022043741773813963, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022043741773813963, "signal/frontier_coverage_10/centered_abs_mean": 0.1532199949026108, "signal/frontier_coverage_10/group_std_mean": 0.19871186316013337, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002191046020016074, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002191046020016074, "signal/frontier_coverage_15/centered_abs_mean": 0.13066325932741166, "signal/frontier_coverage_15/group_std_mean": 0.16969827115535735, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018684846349060536, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018684846349060536, "signal/frontier_coverage_20/centered_abs_mean": 0.08451437950134277, "signal/frontier_coverage_20/group_std_mean": 0.11034028381109237, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012085556285455824, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012085556285455824, "signal/frontier_coverage_25/centered_abs_mean": 0.05478915497660637, "signal/frontier_coverage_25/group_std_mean": 0.07099926471710205, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007834849297069013, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007834849297069013, "signal/frontier_coverage_5/centered_abs_mean": 0.15415203273296357, "signal/frontier_coverage_5/group_std_mean": 0.1998533695936203, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022043741773813963, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022043741773813963, "step": 295 }, { "calibration/aurc": 0.32084141990480985, "calibration/batch_distribution_entropy": 0.9631872285829151, "calibration/buffer_distribution_entropy": 0.9660335700291272, "calibration/confidence_entropy": 0.4529352593268487, "calibration/coverage@0%": 0.007421875, "calibration/coverage@1%": 0.007421875, "calibration/coverage@10%": 0.129296875, "calibration/coverage@15%": 0.28828125, "calibration/coverage@20%": 0.334375, "calibration/coverage@25%": 0.373046875, "calibration/coverage@30%": 0.42578125, "calibration/coverage@5%": 0.0359375, "calibration/ece": 0.1582392307355111, "calibration/mean_confidence": 0.5463262394170487, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 470.2, "completions/max_terminated_length": 470.2, "completions/mean_length": 211.44248046875, "completions/mean_terminated_length": 211.4843017578125, "completions/min_length": 62.0, "completions/min_terminated_length": 101.8, "epoch": 0.96, "grad_norm": 0.0007353639230132103, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 1013011744.0, "reward": 0.9526812791824341, "reward_std": 0.05707969143986702, "rewards/accuracy_reward": 0.52734375, "rewards/brier_reward": 0.8101108074188232, "rewards/confidence_uniqueness_reward": 0.956527829170227, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.14714392870664597, "rewards/frontier_coverage_1": 0.14714392870664597, "rewards/frontier_coverage_10": 0.1471204236149788, "rewards/frontier_coverage_15": 0.1279465898871422, "rewards/frontier_coverage_20": 0.0889292061328888, "rewards/frontier_coverage_25": 0.06472631767392159, "rewards/frontier_coverage_5": 0.14714392870664597, "signal/accuracy_reward/centered_abs_mean": 0.076416015625, "signal/accuracy_reward/group_std_mean": 0.1025225818157196, "signal/accuracy_reward/group_zero_std_frac": 0.703125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0382080078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0382080078125, "signal/advantage_abs_mean": 0.04289043098688126, "signal/advantage_pre_scale_abs_mean": 0.04289043098688126, "signal/advantage_pre_scale_std": 0.08751718997955323, "signal/advantage_std": 0.08751718997955323, "signal/brier_reward/centered_abs_mean": 0.09678200632333755, "signal/brier_reward/group_std_mean": 0.12593707144260408, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.009678200632333756, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009678200632333756, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.017707385867834092, "signal/confidence_uniqueness_reward/group_std_mean": 0.022501471638679504, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001770738698542118, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001770738698542118, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.13480642139911653, "signal/frontier_coverage_0/group_std_mean": 0.17521958649158478, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019277318846434356, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019277318846434356, "signal/frontier_coverage_1/centered_abs_mean": 0.13480642139911653, "signal/frontier_coverage_1/group_std_mean": 0.17521958649158478, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019277318846434356, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019277318846434356, "signal/frontier_coverage_10/centered_abs_mean": 0.1338825672864914, "signal/frontier_coverage_10/group_std_mean": 0.17401980459690095, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019145207479596138, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019145207479596138, "signal/frontier_coverage_15/centered_abs_mean": 0.11472393870353699, "signal/frontier_coverage_15/group_std_mean": 0.14965740144252776, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016405523056164384, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016405523056164384, "signal/frontier_coverage_20/centered_abs_mean": 0.07549109011888504, "signal/frontier_coverage_20/group_std_mean": 0.09850892573595046, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010795225854963065, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010795225854963065, "signal/frontier_coverage_25/centered_abs_mean": 0.05492957755923271, "signal/frontier_coverage_25/group_std_mean": 0.07022278383374214, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007854929543100297, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007854929543100297, "signal/frontier_coverage_5/centered_abs_mean": 0.13480642139911653, "signal/frontier_coverage_5/group_std_mean": 0.17521958649158478, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019277318846434356, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019277318846434356, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.43207962981231096, "eval_calibration/batch_distribution_entropy": 0.9353238475943706, "eval_calibration/buffer_distribution_entropy": 0.9659055969626256, "eval_calibration/confidence_entropy": 0.4578375000571542, "eval_calibration/coverage@0%": 0.09375, "eval_calibration/coverage@1%": 0.09375, "eval_calibration/coverage@10%": 0.109375, "eval_calibration/coverage@15%": 0.125, "eval_calibration/coverage@20%": 0.1328125, "eval_calibration/coverage@25%": 0.171875, "eval_calibration/coverage@30%": 0.1796875, "eval_calibration/coverage@5%": 0.09375, "eval_calibration/ece": 0.19972512714813934, "eval_calibration/mean_confidence": 0.4987985646481393, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 403.5, "eval_completions/max_terminated_length": 403.5, "eval_completions/mean_length": 216.9659881591797, "eval_completions/mean_terminated_length": 216.9659881591797, "eval_completions/min_length": 116.25, "eval_completions/min_terminated_length": 116.25, "eval_loss": 0.0, "eval_num_tokens": 1013011744.0, "eval_reward": 0.9047603160142899, "eval_reward_std": 0.2327863685786724, "eval_rewards/accuracy_reward": 0.435546875, "eval_rewards/brier_reward": 0.8014965802431107, "eval_rewards/confidence_uniqueness_reward": 0.90576171875, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.1972401924431324, "eval_rewards/frontier_coverage_1": 0.1972401924431324, "eval_rewards/frontier_coverage_10": 0.1972401924431324, "eval_rewards/frontier_coverage_15": 0.1668083593249321, "eval_rewards/frontier_coverage_20": 0.11011006683111191, "eval_rewards/frontier_coverage_25": 0.07125817239284515, "eval_rewards/frontier_coverage_5": 0.1972401924431324, "eval_runtime": 20.6578, "eval_samples_per_second": 24.204, "eval_signal/accuracy_reward/centered_abs_mean": 0.4749755859375, "eval_signal/accuracy_reward/group_std_mean": 0.49479666352272034, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23748779296875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23748779296875, "eval_signal/advantage_abs_mean": 0.21877508983016014, "eval_signal/advantage_pre_scale_abs_mean": 0.21877508983016014, "eval_signal/advantage_pre_scale_std": 0.23022845014929771, "eval_signal/advantage_std": 0.23022845014929771, "eval_signal/brier_reward/centered_abs_mean": 0.1955815851688385, "eval_signal/brier_reward/group_std_mean": 0.24249068275094032, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019558158703148365, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019558158703148365, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.038177490234375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.045081330463290215, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003817749093286693, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003817749093286693, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.3407173827290535, "eval_signal/frontier_coverage_0/group_std_mean": 0.42629872262477875, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00487225828692317, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00487225828692317, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3407173827290535, "eval_signal/frontier_coverage_1/group_std_mean": 0.42629872262477875, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00487225828692317, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00487225828692317, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3407173827290535, "eval_signal/frontier_coverage_10/group_std_mean": 0.42629872262477875, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00487225828692317, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00487225828692317, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.28800592571496964, "eval_signal/frontier_coverage_15/group_std_mean": 0.3627534434199333, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004118484794162214, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004118484794162214, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.17610583826899529, "eval_signal/frontier_coverage_20/group_std_mean": 0.2276080958545208, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002518313529435545, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002518313529435545, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.10346624068915844, "eval_signal/frontier_coverage_25/group_std_mean": 0.13241487741470337, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014795672905165702, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014795672905165702, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3407173827290535, "eval_signal/frontier_coverage_5/group_std_mean": 0.42629872262477875, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00487225828692317, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00487225828692317, "eval_steps_per_second": 0.194, "step": 300 }, { "calibration/aurc": 0.25253727756623107, "calibration/batch_distribution_entropy": 0.9449117322882312, "calibration/buffer_distribution_entropy": 0.9660246365706477, "calibration/confidence_entropy": 0.4673776739365726, "calibration/coverage@0%": 0.036328125, "calibration/coverage@1%": 0.036328125, "calibration/coverage@10%": 0.28046875, "calibration/coverage@15%": 0.38125, "calibration/coverage@20%": 0.495703125, "calibration/coverage@25%": 0.562109375, "calibration/coverage@30%": 0.616015625, "calibration/coverage@5%": 0.0609375, "calibration/ece": 0.11527308122425592, "calibration/mean_confidence": 0.541623520405874, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 483.8, "completions/max_terminated_length": 483.8, "completions/mean_length": 214.971875, "completions/mean_terminated_length": 214.99241638183594, "completions/min_length": 87.4, "completions/min_terminated_length": 108.6, "epoch": 0.976, "grad_norm": 0.0008463452104479074, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 1030074176.0, "reward": 0.9660236597061157, "reward_std": 0.06459108740091324, "rewards/accuracy_reward": 0.55732421875, "rewards/brier_reward": 0.8069370865821839, "rewards/confidence_uniqueness_reward": 0.9547674655914307, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.12958877347409725, "rewards/frontier_coverage_1": 0.12958877347409725, "rewards/frontier_coverage_10": 0.12983475551009177, "rewards/frontier_coverage_15": 0.11786438822746277, "rewards/frontier_coverage_20": 0.08397987484931946, "rewards/frontier_coverage_25": 0.06556350365281105, "rewards/frontier_coverage_5": 0.12958877347409725, "signal/accuracy_reward/centered_abs_mean": 0.088421630859375, "signal/accuracy_reward/group_std_mean": 0.11987629979848861, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0442108154296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0442108154296875, "signal/advantage_abs_mean": 0.048282912001013756, "signal/advantage_pre_scale_abs_mean": 0.048282912001013756, "signal/advantage_pre_scale_std": 0.09471045136451721, "signal/advantage_std": 0.09471045136451721, "signal/brier_reward/centered_abs_mean": 0.10018168091773987, "signal/brier_reward/group_std_mean": 0.1312152311205864, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010018168576061725, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010018168576061725, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019286222010850906, "signal/confidence_uniqueness_reward/group_std_mean": 0.024579422920942305, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0019286222057417035, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019286222057417035, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.140859717130661, "signal/frontier_coverage_0/group_std_mean": 0.18376873433589935, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020142939407378434, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020142939407378434, "signal/frontier_coverage_1/centered_abs_mean": 0.140859717130661, "signal/frontier_coverage_1/group_std_mean": 0.18376873433589935, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020142939407378434, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020142939407378434, "signal/frontier_coverage_10/centered_abs_mean": 0.14001604318618774, "signal/frontier_coverage_10/group_std_mean": 0.1826833665370941, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00200222940184176, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00200222940184176, "signal/frontier_coverage_15/centered_abs_mean": 0.1187993735074997, "signal/frontier_coverage_15/group_std_mean": 0.15566462874412537, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016988310497254133, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016988310497254133, "signal/frontier_coverage_20/centered_abs_mean": 0.07476956397294998, "signal/frontier_coverage_20/group_std_mean": 0.0980818435549736, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010692047653719783, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010692047653719783, "signal/frontier_coverage_25/centered_abs_mean": 0.05499729737639427, "signal/frontier_coverage_25/group_std_mean": 0.07091807499527931, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007864613551646471, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007864613551646471, "signal/frontier_coverage_5/centered_abs_mean": 0.140859717130661, "signal/frontier_coverage_5/group_std_mean": 0.18376873433589935, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020142939407378434, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020142939407378434, "step": 305 }, { "calibration/aurc": 0.34024954900528936, "calibration/batch_distribution_entropy": 0.9299159526632534, "calibration/buffer_distribution_entropy": 0.9659855427044987, "calibration/confidence_entropy": 0.4099067239170922, "calibration/coverage@0%": 0.02421875, "calibration/coverage@1%": 0.02421875, "calibration/coverage@10%": 0.08984375, "calibration/coverage@15%": 0.116015625, "calibration/coverage@20%": 0.159765625, "calibration/coverage@25%": 0.3703125, "calibration/coverage@30%": 0.4921875, "calibration/coverage@5%": 0.046484375, "calibration/ece": 0.14085424638925065, "calibration/mean_confidence": 0.48945767108986615, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 580.6, "completions/max_terminated_length": 580.6, "completions/mean_length": 209.4171875, "completions/mean_terminated_length": 209.4171875, "completions/min_length": 102.4, "completions/min_terminated_length": 102.4, "epoch": 0.992, "grad_norm": 0.0008461447432637215, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 1047347088.0, "reward": 0.952314579486847, "reward_std": 0.057999057322740556, "rewards/accuracy_reward": 0.5294921875, "rewards/brier_reward": 0.8023276925086975, "rewards/confidence_uniqueness_reward": 0.942218017578125, "rewards/format_reward": 1.0, "rewards/frontier_coverage_0": 0.15457661747932433, "rewards/frontier_coverage_1": 0.15457661747932433, "rewards/frontier_coverage_10": 0.15457661747932433, "rewards/frontier_coverage_15": 0.13298805058002472, "rewards/frontier_coverage_20": 0.09164022654294968, "rewards/frontier_coverage_25": 0.07411976456642151, "rewards/frontier_coverage_5": 0.15457661747932433, "signal/accuracy_reward/centered_abs_mean": 0.07926025390625, "signal/accuracy_reward/group_std_mean": 0.10321827828884125, "signal/accuracy_reward/group_zero_std_frac": 0.7125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039630126953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.039630126953125, "signal/advantage_abs_mean": 0.04445498287677765, "signal/advantage_pre_scale_abs_mean": 0.04445498287677765, "signal/advantage_pre_scale_std": 0.08984951674938202, "signal/advantage_std": 0.08984951674938202, "signal/brier_reward/centered_abs_mean": 0.10390263050794601, "signal/brier_reward/group_std_mean": 0.13310863077640533, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010390263237059116, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010390263237059116, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02657153606414795, "signal/confidence_uniqueness_reward/group_std_mean": 0.03434660360217094, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002657153643667698, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002657153643667698, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_0/centered_abs_mean": 0.13931848406791686, "signal/frontier_coverage_0/group_std_mean": 0.1796002447605133, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00199225430842489, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00199225430842489, "signal/frontier_coverage_1/centered_abs_mean": 0.13931848406791686, "signal/frontier_coverage_1/group_std_mean": 0.1796002447605133, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00199225430842489, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00199225430842489, "signal/frontier_coverage_10/centered_abs_mean": 0.13931848406791686, "signal/frontier_coverage_10/group_std_mean": 0.1796002447605133, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00199225430842489, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00199225430842489, "signal/frontier_coverage_15/centered_abs_mean": 0.11711540371179581, "signal/frontier_coverage_15/group_std_mean": 0.1513279214501381, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016747502610087394, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016747502610087394, "signal/frontier_coverage_20/centered_abs_mean": 0.07272039651870728, "signal/frontier_coverage_20/group_std_mean": 0.0945111259818077, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010399016202427447, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010399016202427447, "signal/frontier_coverage_25/centered_abs_mean": 0.055749702453613284, "signal/frontier_coverage_25/group_std_mean": 0.0714589461684227, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007972207386046648, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007972207386046648, "signal/frontier_coverage_5/centered_abs_mean": 0.13931848406791686, "signal/frontier_coverage_5/group_std_mean": 0.1796002447605133, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00199225430842489, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00199225430842489, "step": 310 }, { "calibration/aurc": 0.27388473601115615, "calibration/batch_distribution_entropy": 0.8467881507069492, "calibration/buffer_distribution_entropy": 0.9655108154257023, "calibration/confidence_entropy": 0.38870360482432476, "calibration/coverage@0%": 0.0400390625, "calibration/coverage@1%": 0.0400390625, "calibration/coverage@10%": 0.072265625, "calibration/coverage@15%": 0.1328125, "calibration/coverage@20%": 0.2412109375, "calibration/coverage@25%": 0.4775390625, "calibration/coverage@30%": 0.6171875, "calibration/coverage@5%": 0.05078125, "calibration/ece": 0.17844140625000002, "calibration/mean_confidence": 0.6485000000000001, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 401.5, "completions/max_terminated_length": 401.5, "completions/mean_length": 205.3455810546875, "completions/mean_terminated_length": 205.3455810546875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.9984, "num_tokens": 1054193615.0, "reward": 0.9565387666225433, "reward_std": 0.06219491548836231, "rewards/accuracy_reward": 0.55615234375, "rewards/brier_reward": 0.7606430053710938, "rewards/confidence_uniqueness_reward": 0.9483394622802734, "rewards/format_reward": 1.0, "rewards/frontier_coverage_0": 0.0879761092364788, "rewards/frontier_coverage_1": 0.0879761092364788, "rewards/frontier_coverage_10": 0.08799909055233002, "rewards/frontier_coverage_15": 0.07236327230930328, "rewards/frontier_coverage_20": 0.054976701736450195, "rewards/frontier_coverage_25": 0.049710165709257126, "rewards/frontier_coverage_5": 0.0879761092364788, "signal/accuracy_reward/centered_abs_mean": 0.080535888671875, "signal/accuracy_reward/group_std_mean": 0.10539381578564644, "signal/accuracy_reward/group_zero_std_frac": 0.703125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0402679443359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0402679443359375, "signal/advantage_abs_mean": 0.04706815257668495, "signal/advantage_pre_scale_abs_mean": 0.04706815257668495, "signal/advantage_pre_scale_std": 0.09463966637849808, "signal/advantage_std": 0.09463966637849808, "signal/brier_reward/centered_abs_mean": 0.11788154020905495, "signal/brier_reward/group_std_mean": 0.14912863820791245, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011788154020905495, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011788154020905495, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02400451898574829, "signal/confidence_uniqueness_reward/group_std_mean": 0.029955977573990822, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024004519800655544, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024004519800655544, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_0/centered_abs_mean": 0.13757885247468948, "signal/frontier_coverage_0/group_std_mean": 0.17696572095155716, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001967377553228289, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001967377553228289, "signal/frontier_coverage_1/centered_abs_mean": 0.13757885247468948, "signal/frontier_coverage_1/group_std_mean": 0.17696572095155716, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001967377553228289, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001967377553228289, "signal/frontier_coverage_10/centered_abs_mean": 0.13729550689458847, "signal/frontier_coverage_10/group_std_mean": 0.1765838861465454, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019633257179521024, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019633257179521024, "signal/frontier_coverage_15/centered_abs_mean": 0.11528988182544708, "signal/frontier_coverage_15/group_std_mean": 0.14871473610401154, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016486452659592032, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016486452659592032, "signal/frontier_coverage_20/centered_abs_mean": 0.07046542316675186, "signal/frontier_coverage_20/group_std_mean": 0.09160730615258217, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010076555190607905, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010076555190607905, "signal/frontier_coverage_25/centered_abs_mean": 0.05335330776870251, "signal/frontier_coverage_25/group_std_mean": 0.06834409385919571, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007629523170180619, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007629523170180619, "signal/frontier_coverage_5/centered_abs_mean": 0.13757885247468948, "signal/frontier_coverage_5/group_std_mean": 0.17696572095155716, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001967377553228289, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001967377553228289, "step": 312, "total_flos": 0.0, "train_loss": 7.492675094675715e-05, "train_runtime": 59844.158, "train_samples_per_second": 0.334, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1054193615, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }