Files
RLCR-v4-ks-adaptive-floor05…/trainer_state.json
ModelHub XC dc5b070ee3 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-adaptive-floor05-hotpot
Source: Original Platform
2026-04-11 04:12:07 +08:00

11452 lines
706 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 50,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"adaptive_ema/accuracy_reward": 0.23182790830371097,
"adaptive_ema/brier_reward": 0.38261595560645834,
"adaptive_ema/format_reward": 0.6731302296015625,
"adaptive_ema/frontier_aurc_reward": 0.3120027589018625,
"adaptive_ema/frontier_coverage_1": 0.3120027589018625,
"adaptive_ema/frontier_coverage_10": 0.3120027589018625,
"adaptive_ema/frontier_coverage_15": 0.3120027589018625,
"adaptive_ema/frontier_coverage_20": 0.3120027589018625,
"adaptive_ema/frontier_coverage_25": 0.3120027589018625,
"adaptive_ema/frontier_coverage_5": 0.3120027589018625,
"adaptive_ema/frontier_ece_reward": 0.3120027589018625,
"adaptive_ema/frontier_entropy_batch_reward": -0.44369452121519914,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.08982320576906204,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.014333849586546422,
"adaptive_weight/frontier_coverage_1": 0.014333849586546422,
"adaptive_weight/frontier_coverage_10": 0.014333849586546422,
"adaptive_weight/frontier_coverage_15": 0.014333849586546422,
"adaptive_weight/frontier_coverage_20": 0.014333849586546422,
"adaptive_weight/frontier_coverage_25": 0.014333849586546422,
"adaptive_weight/frontier_coverage_5": 0.014333849586546422,
"adaptive_weight/frontier_ece_reward": 0.1000967189669609,
"adaptive_weight/frontier_entropy_batch_reward": 0.21004312336444855,
"calibration/aurc": 0.6448292157088171,
"calibration/batch_distribution_entropy": 0.6427565667944656,
"calibration/batch_entropy_100bins": 0.4781411640735785,
"calibration/batch_entropy_10bins": 0.6427565667944656,
"calibration/batch_entropy_50bins": 0.5589868489931903,
"calibration/batch_uniqueness": 0.7176582774738784,
"calibration/confidence_entropy": 0.34260182991373694,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5034219005334309,
"calibration/mean_confidence": 0.7952695676480407,
"calibration/prompt_uniqueness": 0.5897596315268132,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03369140625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1512.8,
"completions/mean_length": 268.30361328125,
"completions/mean_terminated_length": 224.09957275390624,
"completions/min_length": 1.8,
"completions/min_terminated_length": 1.8,
"epoch": 0.016,
"grad_norm": 0.079594187438488,
"learning_rate": 3.1249999999999997e-07,
"loss": 0.0614,
"num_tokens": 17591461.0,
"reward": 0.4612198233604431,
"reward_std": 0.38416741490364076,
"rewards/accuracy_reward": 0.21767578125,
"rewards/brier_reward": 0.37440124750137327,
"rewards/format_reward": 0.68125,
"rewards/frontier_aurc_reward": 0.3006488770246506,
"rewards/frontier_coverage_1": 0.3006488770246506,
"rewards/frontier_coverage_10": 0.3006488770246506,
"rewards/frontier_coverage_15": 0.3006488770246506,
"rewards/frontier_coverage_20": 0.3006488770246506,
"rewards/frontier_coverage_25": 0.3006488770246506,
"rewards/frontier_coverage_5": 0.3006488770246506,
"rewards/frontier_ece_reward": 0.3006488770246506,
"rewards/frontier_entropy_batch_reward": -0.45452544689178465,
"signal/accuracy_reward/centered_abs_mean": 0.238604736328125,
"signal/accuracy_reward/group_bin_occupancy": 0.21171875,
"signal/accuracy_reward/group_std_mean": 0.28302725255489347,
"signal/accuracy_reward/group_zero_std_frac": 0.30625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1193023681640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1193023681640625,
"signal/advantage_abs_mean": 0.32754938006401063,
"signal/advantage_pre_scale_abs_mean": 0.32754938006401063,
"signal/advantage_pre_scale_std": 0.39201714396476744,
"signal/advantage_std": 0.39201714396476744,
"signal/brier_reward/centered_abs_mean": 0.318459290266037,
"signal/brier_reward/group_bin_occupancy": 0.746484375,
"signal/brier_reward/group_std_mean": 0.36360487937927244,
"signal/brier_reward/group_zero_std_frac": 0.003125,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028604864701628686,
"signal/brier_reward/weight": 0.08982320576906204,
"signal/brier_reward/weighted_centered_abs_mean": 0.028604864701628686,
"signal/format_reward/centered_abs_mean": 0.40308837890625,
"signal/format_reward/group_bin_occupancy": 0.25,
"signal/format_reward/group_std_mean": 0.45328914523124697,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.201544189453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.201544189453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.2910795986652374,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6625,
"signal/frontier_aurc_reward/group_std_mean": 0.3423191785812378,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0041722475551068785,
"signal/frontier_aurc_reward/weight": 0.014333849586546422,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0041722475551068785,
"signal/frontier_coverage_1/centered_abs_mean": 0.2910795986652374,
"signal/frontier_coverage_1/group_bin_occupancy": 0.6625,
"signal/frontier_coverage_1/group_std_mean": 0.3423191785812378,
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0041722475551068785,
"signal/frontier_coverage_1/weight": 0.014333849586546422,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0041722475551068785,
"signal/frontier_coverage_10/centered_abs_mean": 0.2910795986652374,
"signal/frontier_coverage_10/group_bin_occupancy": 0.6625,
"signal/frontier_coverage_10/group_std_mean": 0.3423191785812378,
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0041722475551068785,
"signal/frontier_coverage_10/weight": 0.014333849586546422,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0041722475551068785,
"signal/frontier_coverage_15/centered_abs_mean": 0.2910795986652374,
"signal/frontier_coverage_15/group_bin_occupancy": 0.6625,
"signal/frontier_coverage_15/group_std_mean": 0.3423191785812378,
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0041722475551068785,
"signal/frontier_coverage_15/weight": 0.014333849586546422,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0041722475551068785,
"signal/frontier_coverage_20/centered_abs_mean": 0.2910795986652374,
"signal/frontier_coverage_20/group_bin_occupancy": 0.6625,
"signal/frontier_coverage_20/group_std_mean": 0.3423191785812378,
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0041722475551068785,
"signal/frontier_coverage_20/weight": 0.014333849586546422,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0041722475551068785,
"signal/frontier_coverage_25/centered_abs_mean": 0.2910795986652374,
"signal/frontier_coverage_25/group_bin_occupancy": 0.6625,
"signal/frontier_coverage_25/group_std_mean": 0.3423191785812378,
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0041722475551068785,
"signal/frontier_coverage_25/weight": 0.014333849586546422,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0041722475551068785,
"signal/frontier_coverage_5/centered_abs_mean": 0.2910795986652374,
"signal/frontier_coverage_5/group_bin_occupancy": 0.6625,
"signal/frontier_coverage_5/group_std_mean": 0.3423191785812378,
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0041722475551068785,
"signal/frontier_coverage_5/weight": 0.014333849586546422,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0041722475551068785,
"signal/frontier_ece_reward/centered_abs_mean": 0.2910795986652374,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6625,
"signal/frontier_ece_reward/group_std_mean": 0.3423191785812378,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02913580760359764,
"signal/frontier_ece_reward/weight": 0.1000967189669609,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02913580760359764,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4769509553909302,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.434375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5222809553146363,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.10018042176961899,
"signal/frontier_entropy_batch_reward/weight": 0.21004312336444855,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.10018042176961899,
"step": 5
},
{
"adaptive_ema/accuracy_reward": 0.23042025751920087,
"adaptive_ema/brier_reward": 0.3819111222332293,
"adaptive_ema/format_reward": 0.6741255697854202,
"adaptive_ema/frontier_aurc_reward": 0.31098182470547453,
"adaptive_ema/frontier_coverage_1": 0.31098182470547453,
"adaptive_ema/frontier_coverage_10": 0.31098182470547453,
"adaptive_ema/frontier_coverage_15": 0.31098182470547453,
"adaptive_ema/frontier_coverage_20": 0.31098182470547453,
"adaptive_ema/frontier_coverage_25": 0.31098182470547453,
"adaptive_ema/frontier_coverage_5": 0.31098182470547453,
"adaptive_ema/frontier_ece_reward": 0.31098182470547453,
"adaptive_ema/frontier_entropy_batch_reward": -0.44455297453697823,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.08983151316642761,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.014340075105428696,
"adaptive_weight/frontier_coverage_1": 0.014340075105428696,
"adaptive_weight/frontier_coverage_10": 0.014340075105428696,
"adaptive_weight/frontier_coverage_15": 0.014340075105428696,
"adaptive_weight/frontier_coverage_20": 0.014340075105428696,
"adaptive_weight/frontier_coverage_25": 0.014340075105428696,
"adaptive_weight/frontier_coverage_5": 0.014340075105428696,
"adaptive_weight/frontier_ece_reward": 0.10014019310474395,
"adaptive_weight/frontier_entropy_batch_reward": 0.2099477618932724,
"calibration/aurc": 0.6860147125059216,
"calibration/batch_distribution_entropy": 0.6500051016919194,
"calibration/batch_entropy_100bins": 0.4773603444617879,
"calibration/batch_entropy_10bins": 0.6500051016919194,
"calibration/batch_entropy_50bins": 0.5612185893380846,
"calibration/batch_uniqueness": 0.7166415776631634,
"calibration/confidence_entropy": 0.34568163418670883,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5364662682352269,
"calibration/mean_confidence": 0.7955750590240586,
"calibration/prompt_uniqueness": 0.609550450226551,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0390625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1496.0,
"completions/mean_length": 265.6697265625,
"completions/mean_terminated_length": 214.0361083984375,
"completions/min_length": 2.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.032,
"grad_norm": 0.03242058679461479,
"learning_rate": 6.249999999999999e-07,
"loss": 0.0646,
"num_tokens": 35412271.0,
"reward": 0.4505932092666626,
"reward_std": 0.35865501761436464,
"rewards/accuracy_reward": 0.20458984375,
"rewards/brier_reward": 0.37263244986534116,
"rewards/format_reward": 0.7111328125,
"rewards/frontier_aurc_reward": 0.2930680811405182,
"rewards/frontier_coverage_1": 0.2930680811405182,
"rewards/frontier_coverage_10": 0.2930680811405182,
"rewards/frontier_coverage_15": 0.2930680811405182,
"rewards/frontier_coverage_20": 0.2930680811405182,
"rewards/frontier_coverage_25": 0.2930680811405182,
"rewards/frontier_coverage_5": 0.2930680811405182,
"rewards/frontier_ece_reward": 0.2930680811405182,
"rewards/frontier_entropy_batch_reward": -0.4739628076553345,
"signal/accuracy_reward/centered_abs_mean": 0.218255615234375,
"signal/accuracy_reward/group_bin_occupancy": 0.209765625,
"signal/accuracy_reward/group_std_mean": 0.2656914174556732,
"signal/accuracy_reward/group_zero_std_frac": 0.321875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1091278076171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1091278076171875,
"signal/advantage_abs_mean": 0.3003044664859772,
"signal/advantage_pre_scale_abs_mean": 0.3003044664859772,
"signal/advantage_pre_scale_std": 0.3666377246379852,
"signal/advantage_std": 0.3666377246379852,
"signal/brier_reward/centered_abs_mean": 0.3034407377243042,
"signal/brier_reward/group_bin_occupancy": 0.7671875,
"signal/brier_reward/group_std_mean": 0.35142738819122316,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027258511632680893,
"signal/brier_reward/weight": 0.08983151316642761,
"signal/brier_reward/weighted_centered_abs_mean": 0.027258511632680893,
"signal/format_reward/centered_abs_mean": 0.37913818359375,
"signal/format_reward/group_bin_occupancy": 0.25,
"signal/format_reward/group_std_mean": 0.439016717672348,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.189569091796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.189569091796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.2739542663097382,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.666796875,
"signal/frontier_aurc_reward/group_std_mean": 0.3285772979259491,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003928511589765549,
"signal/frontier_aurc_reward/weight": 0.014340075105428696,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003928511589765549,
"signal/frontier_coverage_1/centered_abs_mean": 0.2739542663097382,
"signal/frontier_coverage_1/group_bin_occupancy": 0.666796875,
"signal/frontier_coverage_1/group_std_mean": 0.3285772979259491,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003928511589765549,
"signal/frontier_coverage_1/weight": 0.014340075105428696,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003928511589765549,
"signal/frontier_coverage_10/centered_abs_mean": 0.2739542663097382,
"signal/frontier_coverage_10/group_bin_occupancy": 0.666796875,
"signal/frontier_coverage_10/group_std_mean": 0.3285772979259491,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003928511589765549,
"signal/frontier_coverage_10/weight": 0.014340075105428696,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003928511589765549,
"signal/frontier_coverage_15/centered_abs_mean": 0.2739542663097382,
"signal/frontier_coverage_15/group_bin_occupancy": 0.666796875,
"signal/frontier_coverage_15/group_std_mean": 0.3285772979259491,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003928511589765549,
"signal/frontier_coverage_15/weight": 0.014340075105428696,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003928511589765549,
"signal/frontier_coverage_20/centered_abs_mean": 0.2739542663097382,
"signal/frontier_coverage_20/group_bin_occupancy": 0.666796875,
"signal/frontier_coverage_20/group_std_mean": 0.3285772979259491,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003928511589765549,
"signal/frontier_coverage_20/weight": 0.014340075105428696,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003928511589765549,
"signal/frontier_coverage_25/centered_abs_mean": 0.2739542663097382,
"signal/frontier_coverage_25/group_bin_occupancy": 0.666796875,
"signal/frontier_coverage_25/group_std_mean": 0.3285772979259491,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003928511589765549,
"signal/frontier_coverage_25/weight": 0.014340075105428696,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003928511589765549,
"signal/frontier_coverage_5/centered_abs_mean": 0.2739542663097382,
"signal/frontier_coverage_5/group_bin_occupancy": 0.666796875,
"signal/frontier_coverage_5/group_std_mean": 0.3285772979259491,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003928511589765549,
"signal/frontier_coverage_5/weight": 0.014340075105428696,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003928511589765549,
"signal/frontier_ece_reward/centered_abs_mean": 0.2739542663097382,
"signal/frontier_ece_reward/group_bin_occupancy": 0.666796875,
"signal/frontier_ece_reward/group_std_mean": 0.3285772979259491,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.027433741465210916,
"signal/frontier_ece_reward/weight": 0.10014019310474395,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.027433741465210916,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4785237729549408,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.441796875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5272870898246765,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.10046486407518387,
"signal/frontier_entropy_batch_reward/weight": 0.2099477618932724,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.10046486407518387,
"step": 10
},
{
"adaptive_ema/accuracy_reward": 0.2303851249087534,
"adaptive_ema/brier_reward": 0.384100495225017,
"adaptive_ema/format_reward": 0.6799472793515441,
"adaptive_ema/frontier_aurc_reward": 0.31133432126378163,
"adaptive_ema/frontier_coverage_1": 0.31152009604459147,
"adaptive_ema/frontier_coverage_10": 0.31152009604459147,
"adaptive_ema/frontier_coverage_15": 0.31152009604459147,
"adaptive_ema/frontier_coverage_20": 0.31152009604459147,
"adaptive_ema/frontier_coverage_25": 0.31152009604459147,
"adaptive_ema/frontier_coverage_5": 0.31152009604459147,
"adaptive_ema/frontier_ece_reward": 0.31121861117666105,
"adaptive_ema/frontier_entropy_batch_reward": -0.44775436091818743,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.08950651288032532,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.014331639185547829,
"adaptive_weight/frontier_coverage_1": 0.01432777550071478,
"adaptive_weight/frontier_coverage_10": 0.01432777550071478,
"adaptive_weight/frontier_coverage_15": 0.01432777550071478,
"adaptive_weight/frontier_coverage_20": 0.01432777550071478,
"adaptive_weight/frontier_coverage_25": 0.01432777550071478,
"adaptive_weight/frontier_coverage_5": 0.01432777550071478,
"adaptive_weight/frontier_ece_reward": 0.10009808540344238,
"adaptive_weight/frontier_entropy_batch_reward": 0.21039710342884063,
"calibration/aurc": 0.6049186984773813,
"calibration/batch_distribution_entropy": 0.6647650329327803,
"calibration/batch_entropy_100bins": 0.486494845315269,
"calibration/batch_entropy_10bins": 0.6647650329327803,
"calibration/batch_entropy_50bins": 0.5687127520068225,
"calibration/batch_uniqueness": 0.7120603469709288,
"calibration/buffer_distribution_entropy": 0.6659962612629504,
"calibration/buffer_entropy_100bins": 0.49174012175065746,
"calibration/buffer_entropy_10bins": 0.6659962612629504,
"calibration/buffer_entropy_50bins": 0.5750999921475161,
"calibration/confidence_entropy": 0.3532576119604358,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4695792380430694,
"calibration/mean_confidence": 0.7954697931195405,
"calibration/prompt_uniqueness": 0.623697074648137,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.019921875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1450.8,
"completions/mean_length": 212.9087890625,
"completions/mean_terminated_length": 186.10792541503906,
"completions/min_length": 11.8,
"completions/min_terminated_length": 11.8,
"epoch": 0.048,
"grad_norm": 0.012215187773108482,
"learning_rate": 9.374999999999999e-07,
"loss": 0.0459,
"num_tokens": 52641193.0,
"reward": 0.5511215567588806,
"reward_std": 0.3094262957572937,
"rewards/accuracy_reward": 0.25849609375,
"rewards/brier_reward": 0.4815207779407501,
"rewards/format_reward": 0.86337890625,
"rewards/frontier_aurc_reward": 0.292304290086031,
"rewards/frontier_coverage_1": 0.3108817681670189,
"rewards/frontier_coverage_10": 0.3108817681670189,
"rewards/frontier_coverage_15": 0.3108817681670189,
"rewards/frontier_coverage_20": 0.3108817681670189,
"rewards/frontier_coverage_25": 0.3108817681670189,
"rewards/frontier_coverage_5": 0.3108817681670189,
"rewards/frontier_ece_reward": 0.2807332813739777,
"rewards/frontier_entropy_batch_reward": -0.5326027274131775,
"signal/accuracy_reward/centered_abs_mean": 0.201080322265625,
"signal/accuracy_reward/group_bin_occupancy": 0.205078125,
"signal/accuracy_reward/group_std_mean": 0.24860720038414003,
"signal/accuracy_reward/group_zero_std_frac": 0.359375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1005401611328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1005401611328125,
"signal/advantage_abs_mean": 0.2522279918193817,
"signal/advantage_pre_scale_abs_mean": 0.2522279918193817,
"signal/advantage_pre_scale_std": 0.3164039790630341,
"signal/advantage_std": 0.3164039790630341,
"signal/brier_reward/centered_abs_mean": 0.28035444021224976,
"signal/brier_reward/group_bin_occupancy": 0.812109375,
"signal/brier_reward/group_std_mean": 0.3334766149520874,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025098316371440887,
"signal/brier_reward/weight": 0.08950651288032532,
"signal/brier_reward/weighted_centered_abs_mean": 0.025098316371440887,
"signal/format_reward/centered_abs_mean": 0.218707275390625,
"signal/format_reward/group_bin_occupancy": 0.244921875,
"signal/format_reward/group_std_mean": 0.31805049777030947,
"signal/format_reward/group_zero_std_frac": 0.040625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1093536376953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1093536376953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.22175134532153606,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7234375,
"signal/frontier_aurc_reward/group_std_mean": 0.26504507940262556,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003176678024465218,
"signal/frontier_aurc_reward/weight": 0.014331639185547829,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003176678024465218,
"signal/frontier_coverage_1/centered_abs_mean": 0.24584266245365144,
"signal/frontier_coverage_1/group_bin_occupancy": 0.716015625,
"signal/frontier_coverage_1/group_std_mean": 0.3008487790822983,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003522046422585845,
"signal/frontier_coverage_1/weight": 0.01432777550071478,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003522046422585845,
"signal/frontier_coverage_10/centered_abs_mean": 0.24584266245365144,
"signal/frontier_coverage_10/group_bin_occupancy": 0.716015625,
"signal/frontier_coverage_10/group_std_mean": 0.3008487790822983,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003522046422585845,
"signal/frontier_coverage_10/weight": 0.01432777550071478,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003522046422585845,
"signal/frontier_coverage_15/centered_abs_mean": 0.24584266245365144,
"signal/frontier_coverage_15/group_bin_occupancy": 0.716015625,
"signal/frontier_coverage_15/group_std_mean": 0.3008487790822983,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003522046422585845,
"signal/frontier_coverage_15/weight": 0.01432777550071478,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003522046422585845,
"signal/frontier_coverage_20/centered_abs_mean": 0.24584266245365144,
"signal/frontier_coverage_20/group_bin_occupancy": 0.716015625,
"signal/frontier_coverage_20/group_std_mean": 0.3008487790822983,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003522046422585845,
"signal/frontier_coverage_20/weight": 0.01432777550071478,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003522046422585845,
"signal/frontier_coverage_25/centered_abs_mean": 0.24584266245365144,
"signal/frontier_coverage_25/group_bin_occupancy": 0.716015625,
"signal/frontier_coverage_25/group_std_mean": 0.3008487790822983,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003522046422585845,
"signal/frontier_coverage_25/weight": 0.01432777550071478,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003522046422585845,
"signal/frontier_coverage_5/centered_abs_mean": 0.24584266245365144,
"signal/frontier_coverage_5/group_bin_occupancy": 0.716015625,
"signal/frontier_coverage_5/group_std_mean": 0.3008487790822983,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003522046422585845,
"signal/frontier_coverage_5/weight": 0.01432777550071478,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003522046422585845,
"signal/frontier_ece_reward/centered_abs_mean": 0.24515217244625093,
"signal/frontier_ece_reward/group_bin_occupancy": 0.716796875,
"signal/frontier_ece_reward/group_std_mean": 0.294309064745903,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.024531540647149085,
"signal/frontier_ece_reward/weight": 0.10009808540344238,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.024531540647149085,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4544231414794922,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5183358907699585,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.09560682773590087,
"signal/frontier_entropy_batch_reward/weight": 0.21039710342884063,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.09560682773590087,
"step": 15
},
{
"adaptive_ema/accuracy_reward": 0.23426954152107396,
"adaptive_ema/brier_reward": 0.39307066305031424,
"adaptive_ema/format_reward": 0.6926847671353432,
"adaptive_ema/frontier_aurc_reward": 0.300348126568556,
"adaptive_ema/frontier_coverage_1": 0.30403770600747176,
"adaptive_ema/frontier_coverage_10": 0.30403770600747176,
"adaptive_ema/frontier_coverage_15": 0.30403770600747176,
"adaptive_ema/frontier_coverage_20": 0.30403770600747176,
"adaptive_ema/frontier_coverage_25": 0.30403770600747176,
"adaptive_ema/frontier_coverage_5": 0.30403770600747176,
"adaptive_ema/frontier_ece_reward": 0.29847388797844887,
"adaptive_ema/frontier_entropy_batch_reward": -0.45138858030431095,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.08781057894229889,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.014495300687849521,
"adaptive_weight/frontier_coverage_1": 0.014418897405266761,
"adaptive_weight/frontier_coverage_10": 0.014418897405266761,
"adaptive_weight/frontier_coverage_15": 0.014418897405266761,
"adaptive_weight/frontier_coverage_20": 0.014418897405266761,
"adaptive_weight/frontier_coverage_25": 0.014418897405266761,
"adaptive_weight/frontier_coverage_5": 0.014418897405266761,
"adaptive_weight/frontier_ece_reward": 0.10149525552988052,
"adaptive_weight/frontier_entropy_batch_reward": 0.20998547673225404,
"calibration/aurc": 0.5263760901597899,
"calibration/batch_distribution_entropy": 0.7777311383989622,
"calibration/batch_entropy_100bins": 0.5323331927952004,
"calibration/batch_entropy_10bins": 0.7777311383989622,
"calibration/batch_entropy_50bins": 0.6220234792840525,
"calibration/batch_uniqueness": 0.7772333424506105,
"calibration/buffer_distribution_entropy": 0.6780116671270415,
"calibration/buffer_entropy_100bins": 0.4986571778459387,
"calibration/buffer_entropy_10bins": 0.6780116671270415,
"calibration/buffer_entropy_50bins": 0.5828922877202816,
"calibration/confidence_entropy": 0.4182842284514491,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.30854319958051096,
"calibration/mean_confidence": 0.7003253907676882,
"calibration/prompt_uniqueness": 0.6911273436300276,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.005078125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1414.8,
"completions/mean_length": 154.23994140625,
"completions/mean_terminated_length": 147.2019500732422,
"completions/min_length": 16.0,
"completions/min_terminated_length": 16.0,
"epoch": 0.064,
"grad_norm": 0.02356830984354019,
"learning_rate": 1e-06,
"loss": 0.0112,
"num_tokens": 69139010.0,
"reward": 0.6089733481407166,
"reward_std": 0.22602881789207457,
"rewards/accuracy_reward": 0.33115234375,
"rewards/brier_reward": 0.6209439873695374,
"rewards/format_reward": 0.97041015625,
"rewards/frontier_aurc_reward": -0.006396393664181233,
"rewards/frontier_coverage_1": 0.09709094911813736,
"rewards/frontier_coverage_10": 0.09709094911813736,
"rewards/frontier_coverage_15": 0.09709094911813736,
"rewards/frontier_coverage_20": 0.09709094911813736,
"rewards/frontier_coverage_25": 0.09709094911813736,
"rewards/frontier_coverage_5": 0.09709094911813736,
"rewards/frontier_ece_reward": -0.046480930084362625,
"rewards/frontier_entropy_batch_reward": -0.4765442192554474,
"signal/accuracy_reward/centered_abs_mean": 0.205462646484375,
"signal/accuracy_reward/group_bin_occupancy": 0.208203125,
"signal/accuracy_reward/group_std_mean": 0.25680303275585176,
"signal/accuracy_reward/group_zero_std_frac": 0.334375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1027313232421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1027313232421875,
"signal/advantage_abs_mean": 0.17884572744369506,
"signal/advantage_pre_scale_abs_mean": 0.17884572744369506,
"signal/advantage_pre_scale_std": 0.2316117614507675,
"signal/advantage_std": 0.2316117614507675,
"signal/brier_reward/centered_abs_mean": 0.23778945803642274,
"signal/brier_reward/group_bin_occupancy": 0.852734375,
"signal/brier_reward/group_std_mean": 0.29429731965065004,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020891552045941352,
"signal/brier_reward/weight": 0.08781057894229889,
"signal/brier_reward/weighted_centered_abs_mean": 0.020891552045941352,
"signal/format_reward/centered_abs_mean": 0.054486083984375,
"signal/format_reward/group_bin_occupancy": 0.196484375,
"signal/format_reward/group_std_mean": 0.1227356806397438,
"signal/format_reward/group_zero_std_frac": 0.428125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0272430419921875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0272430419921875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00433549745939672,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72109375,
"signal/frontier_aurc_reward/group_std_mean": 0.006195350922644139,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.27727509709075e-05,
"signal/frontier_aurc_reward/weight": 0.014495300687849521,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.27727509709075e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14411845207214355,
"signal/frontier_coverage_1/group_bin_occupancy": 0.744140625,
"signal/frontier_coverage_1/group_std_mean": 0.20870547592639924,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002079009125009179,
"signal/frontier_coverage_1/weight": 0.014418897405266761,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002079009125009179,
"signal/frontier_coverage_10/centered_abs_mean": 0.14411845207214355,
"signal/frontier_coverage_10/group_bin_occupancy": 0.744140625,
"signal/frontier_coverage_10/group_std_mean": 0.20870547592639924,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002079009125009179,
"signal/frontier_coverage_10/weight": 0.014418897405266761,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002079009125009179,
"signal/frontier_coverage_15/centered_abs_mean": 0.14411845207214355,
"signal/frontier_coverage_15/group_bin_occupancy": 0.744140625,
"signal/frontier_coverage_15/group_std_mean": 0.20870547592639924,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002079009125009179,
"signal/frontier_coverage_15/weight": 0.014418897405266761,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002079009125009179,
"signal/frontier_coverage_20/centered_abs_mean": 0.14411845207214355,
"signal/frontier_coverage_20/group_bin_occupancy": 0.744140625,
"signal/frontier_coverage_20/group_std_mean": 0.20870547592639924,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002079009125009179,
"signal/frontier_coverage_20/weight": 0.014418897405266761,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002079009125009179,
"signal/frontier_coverage_25/centered_abs_mean": 0.14411845207214355,
"signal/frontier_coverage_25/group_bin_occupancy": 0.744140625,
"signal/frontier_coverage_25/group_std_mean": 0.20870547592639924,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002079009125009179,
"signal/frontier_coverage_25/weight": 0.014418897405266761,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002079009125009179,
"signal/frontier_coverage_5/centered_abs_mean": 0.14411845207214355,
"signal/frontier_coverage_5/group_bin_occupancy": 0.744140625,
"signal/frontier_coverage_5/group_std_mean": 0.20870547592639924,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002079009125009179,
"signal/frontier_coverage_5/weight": 0.014418897405266761,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002079009125009179,
"signal/frontier_ece_reward/centered_abs_mean": 0.12881973385810852,
"signal/frontier_ece_reward/group_bin_occupancy": 0.745703125,
"signal/frontier_ece_reward/group_std_mean": 0.15685342252254486,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013068030402064324,
"signal/frontier_ece_reward/weight": 0.10149525552988052,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013068030402064324,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4035856008529663,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.641796875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.48287264108657835,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0847533330321312,
"signal/frontier_entropy_batch_reward/weight": 0.20998547673225404,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0847533330321312,
"step": 20
},
{
"adaptive_ema/accuracy_reward": 0.23955027289038525,
"adaptive_ema/brier_reward": 0.4080646915669138,
"adaptive_ema/format_reward": 0.707112215851047,
"adaptive_ema/frontier_aurc_reward": 0.28535526680946016,
"adaptive_ema/frontier_coverage_1": 0.29610780835781203,
"adaptive_ema/frontier_coverage_10": 0.29610780835781203,
"adaptive_ema/frontier_coverage_15": 0.29610780835781203,
"adaptive_ema/frontier_coverage_20": 0.29610780835781203,
"adaptive_ema/frontier_coverage_25": 0.29610780835781203,
"adaptive_ema/frontier_coverage_5": 0.29610780835781203,
"adaptive_ema/frontier_ece_reward": 0.28265504878692915,
"adaptive_ema/frontier_entropy_batch_reward": -0.4445421524261969,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.08556736111640931,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.014793501235544682,
"adaptive_weight/frontier_coverage_1": 0.01457088440656662,
"adaptive_weight/frontier_coverage_10": 0.01457088440656662,
"adaptive_weight/frontier_coverage_15": 0.01457088440656662,
"adaptive_weight/frontier_coverage_20": 0.01457088440656662,
"adaptive_weight/frontier_coverage_25": 0.01457088440656662,
"adaptive_weight/frontier_coverage_5": 0.01457088440656662,
"adaptive_weight/frontier_ece_reward": 0.10369691401720046,
"adaptive_weight/frontier_entropy_batch_reward": 0.20881690979003906,
"calibration/aurc": 0.6171283887858363,
"calibration/batch_distribution_entropy": 0.8674204051435778,
"calibration/batch_entropy_100bins": 0.5776314716132508,
"calibration/batch_entropy_10bins": 0.8674204051435778,
"calibration/batch_entropy_50bins": 0.6761122944661062,
"calibration/batch_uniqueness": 0.8069641110167801,
"calibration/buffer_distribution_entropy": 0.7525090521356054,
"calibration/buffer_entropy_100bins": 0.5323303585319001,
"calibration/buffer_entropy_10bins": 0.7525090521356054,
"calibration/buffer_entropy_50bins": 0.622013500945575,
"calibration/confidence_entropy": 0.503978766149048,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.2100240513244424,
"calibration/mean_confidence": 0.47824112817970243,
"calibration/prompt_uniqueness": 0.7171275138018345,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00146484375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 901.6,
"completions/mean_length": 130.5357421875,
"completions/mean_terminated_length": 128.4743423461914,
"completions/min_length": 21.8,
"completions/min_terminated_length": 21.8,
"epoch": 0.08,
"grad_norm": 0.02094282954931259,
"learning_rate": 1e-06,
"loss": 0.0017,
"num_tokens": 85408848.0,
"reward": 0.6991242527961731,
"reward_std": 0.17349291741847991,
"rewards/accuracy_reward": 0.34111328125,
"rewards/brier_reward": 0.7374354720115661,
"rewards/format_reward": 0.9921875,
"rewards/frontier_aurc_reward": -0.005245448090136051,
"rewards/frontier_coverage_1": 0.17646725475788116,
"rewards/frontier_coverage_10": 0.17646725475788116,
"rewards/frontier_coverage_15": 0.17646725475788116,
"rewards/frontier_coverage_20": 0.17646725475788116,
"rewards/frontier_coverage_25": 0.17646725475788116,
"rewards/frontier_coverage_5": 0.17646725475788116,
"rewards/frontier_ece_reward": -0.01268238362390548,
"rewards/frontier_entropy_batch_reward": -0.21498993635177613,
"signal/accuracy_reward/centered_abs_mean": 0.190802001953125,
"signal/accuracy_reward/group_bin_occupancy": 0.204296875,
"signal/accuracy_reward/group_std_mean": 0.24003153443336486,
"signal/accuracy_reward/group_zero_std_frac": 0.365625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0954010009765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0954010009765625,
"signal/advantage_abs_mean": 0.13710006028413774,
"signal/advantage_pre_scale_abs_mean": 0.13710006028413774,
"signal/advantage_pre_scale_std": 0.1836077630519867,
"signal/advantage_std": 0.1836077630519867,
"signal/brier_reward/centered_abs_mean": 0.18633546233177184,
"signal/brier_reward/group_bin_occupancy": 0.838671875,
"signal/brier_reward/group_std_mean": 0.24008690118789672,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015948853641748428,
"signal/brier_reward/weight": 0.08556736111640931,
"signal/brier_reward/weighted_centered_abs_mean": 0.015948853641748428,
"signal/format_reward/centered_abs_mean": 0.01419677734375,
"signal/format_reward/group_bin_occupancy": 0.147265625,
"signal/format_reward/group_std_mean": 0.034811738133430484,
"signal/format_reward/group_zero_std_frac": 0.821875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007098388671875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007098388671875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022644552402198313,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.683984375,
"signal/frontier_aurc_reward/group_std_mean": 0.003705321438610554,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3452448406023905e-05,
"signal/frontier_aurc_reward/weight": 0.014793501235544682,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3452448406023905e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2472827762365341,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_1/group_std_mean": 0.3209026396274567,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003604399133473635,
"signal/frontier_coverage_1/weight": 0.01457088440656662,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003604399133473635,
"signal/frontier_coverage_10/centered_abs_mean": 0.2472827762365341,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_10/group_std_mean": 0.3209026396274567,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003604399133473635,
"signal/frontier_coverage_10/weight": 0.01457088440656662,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003604399133473635,
"signal/frontier_coverage_15/centered_abs_mean": 0.2472827762365341,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_15/group_std_mean": 0.3209026396274567,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003604399133473635,
"signal/frontier_coverage_15/weight": 0.01457088440656662,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003604399133473635,
"signal/frontier_coverage_20/centered_abs_mean": 0.2472827762365341,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_20/group_std_mean": 0.3209026396274567,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003604399133473635,
"signal/frontier_coverage_20/weight": 0.01457088440656662,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003604399133473635,
"signal/frontier_coverage_25/centered_abs_mean": 0.2472827762365341,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_25/group_std_mean": 0.3209026396274567,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003604399133473635,
"signal/frontier_coverage_25/weight": 0.01457088440656662,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003604399133473635,
"signal/frontier_coverage_5/centered_abs_mean": 0.2472827762365341,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_5/group_std_mean": 0.3209026396274567,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003604399133473635,
"signal/frontier_coverage_5/weight": 0.01457088440656662,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003604399133473635,
"signal/frontier_ece_reward/centered_abs_mean": 0.08847985863685608,
"signal/frontier_ece_reward/group_bin_occupancy": 0.775,
"signal/frontier_ece_reward/group_std_mean": 0.11089985370635987,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009167110547423363,
"signal/frontier_ece_reward/weight": 0.10369691401720046,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009167110547423363,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3149270832538605,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.592578125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3907872557640076,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.06577974408864976,
"signal/frontier_entropy_batch_reward/weight": 0.20881690979003906,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.06577974408864976,
"step": 25
},
{
"adaptive_ema/accuracy_reward": 0.24428599930218375,
"adaptive_ema/brier_reward": 0.42536614088638436,
"adaptive_ema/format_reward": 0.7211933273616375,
"adaptive_ema/frontier_aurc_reward": 0.27112399060683245,
"adaptive_ema/frontier_coverage_1": 0.2919919365600858,
"adaptive_ema/frontier_coverage_10": 0.2919919365600858,
"adaptive_ema/frontier_coverage_15": 0.2919919365600858,
"adaptive_ema/frontier_coverage_20": 0.2919919365600858,
"adaptive_ema/frontier_coverage_25": 0.2919919365600858,
"adaptive_ema/frontier_coverage_5": 0.2919919365600858,
"adaptive_ema/frontier_ece_reward": 0.2685948928242548,
"adaptive_ema/frontier_entropy_batch_reward": -0.43403338201474606,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.08326314240694047,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.015123799070715904,
"adaptive_weight/frontier_coverage_1": 0.014690774492919445,
"adaptive_weight/frontier_coverage_10": 0.014690774492919445,
"adaptive_weight/frontier_coverage_15": 0.014690774492919445,
"adaptive_weight/frontier_coverage_20": 0.014690774492919445,
"adaptive_weight/frontier_coverage_25": 0.014690774492919445,
"adaptive_weight/frontier_coverage_5": 0.014690774492919445,
"adaptive_weight/frontier_ece_reward": 0.10597957968711853,
"adaptive_weight/frontier_entropy_batch_reward": 0.20778882503509521,
"calibration/aurc": 0.6043924761417212,
"calibration/batch_distribution_entropy": 0.8436481433817444,
"calibration/batch_entropy_100bins": 0.6037587687908283,
"calibration/batch_entropy_10bins": 0.8436481433817444,
"calibration/batch_entropy_50bins": 0.6966506318827843,
"calibration/batch_uniqueness": 0.827812804990921,
"calibration/buffer_distribution_entropy": 0.8374224803093785,
"calibration/buffer_entropy_100bins": 0.5783989963531001,
"calibration/buffer_entropy_10bins": 0.8374224803093785,
"calibration/buffer_entropy_50bins": 0.6750286362079907,
"calibration/confidence_entropy": 0.4849612253994723,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.005893160413771808,
"calibration/coverage@20%": 0.008649860727033207,
"calibration/coverage@25%": 0.012989111220129853,
"calibration/coverage@30%": 0.016926119094145604,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.1533266415432468,
"calibration/mean_confidence": 0.3279998444013065,
"calibration/prompt_uniqueness": 0.7420654654801133,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00107421875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 695.4,
"completions/mean_length": 132.56982421875,
"completions/mean_terminated_length": 131.06031799316406,
"completions/min_length": 36.8,
"completions/min_terminated_length": 36.8,
"epoch": 0.096,
"grad_norm": 0.0061478931456804276,
"learning_rate": 1e-06,
"loss": 0.0027,
"num_tokens": 101810971.0,
"reward": 0.6923112273216248,
"reward_std": 0.15285933017730713,
"rewards/accuracy_reward": 0.3416015625,
"rewards/brier_reward": 0.7662726521492005,
"rewards/format_reward": 0.99560546875,
"rewards/frontier_aurc_reward": -0.004837475996464491,
"rewards/frontier_coverage_1": 0.2223384290933609,
"rewards/frontier_coverage_10": 0.2223384290933609,
"rewards/frontier_coverage_15": 0.2223384290933609,
"rewards/frontier_coverage_20": 0.2223384290933609,
"rewards/frontier_coverage_25": 0.2223384290933609,
"rewards/frontier_coverage_5": 0.2223384290933609,
"rewards/frontier_ece_reward": 6.405212916433811e-05,
"rewards/frontier_entropy_batch_reward": -0.288457790017128,
"signal/accuracy_reward/centered_abs_mean": 0.18267822265625,
"signal/accuracy_reward/group_bin_occupancy": 0.203515625,
"signal/accuracy_reward/group_std_mean": 0.23303787410259247,
"signal/accuracy_reward/group_zero_std_frac": 0.371875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.091339111328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.091339111328125,
"signal/advantage_abs_mean": 0.12113873660564423,
"signal/advantage_pre_scale_abs_mean": 0.12113873660564423,
"signal/advantage_pre_scale_std": 0.16447269320487976,
"signal/advantage_std": 0.16447269320487976,
"signal/brier_reward/centered_abs_mean": 0.17132785618305207,
"signal/brier_reward/group_bin_occupancy": 0.8296875,
"signal/brier_reward/group_std_mean": 0.22054792940616608,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01426246203482151,
"signal/brier_reward/weight": 0.08326314240694047,
"signal/brier_reward/weighted_centered_abs_mean": 0.01426246203482151,
"signal/format_reward/centered_abs_mean": 0.008465576171875,
"signal/format_reward/group_bin_occupancy": 0.141015625,
"signal/format_reward/group_std_mean": 0.02351398840546608,
"signal/format_reward/group_zero_std_frac": 0.871875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0042327880859375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0042327880859375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015173472464084625,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.683203125,
"signal/frontier_aurc_reward/group_std_mean": 0.002604399994015694,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.293462930538226e-05,
"signal/frontier_aurc_reward/weight": 0.015123799070715904,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.293462930538226e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2993292808532715,
"signal/frontier_coverage_1/group_bin_occupancy": 0.9125,
"signal/frontier_coverage_1/group_std_mean": 0.3749782383441925,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0043978951405733826,
"signal/frontier_coverage_1/weight": 0.014690774492919445,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0043978951405733826,
"signal/frontier_coverage_10/centered_abs_mean": 0.2993292808532715,
"signal/frontier_coverage_10/group_bin_occupancy": 0.9125,
"signal/frontier_coverage_10/group_std_mean": 0.3749782383441925,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0043978951405733826,
"signal/frontier_coverage_10/weight": 0.014690774492919445,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0043978951405733826,
"signal/frontier_coverage_15/centered_abs_mean": 0.2993292808532715,
"signal/frontier_coverage_15/group_bin_occupancy": 0.9125,
"signal/frontier_coverage_15/group_std_mean": 0.3749782383441925,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0043978951405733826,
"signal/frontier_coverage_15/weight": 0.014690774492919445,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0043978951405733826,
"signal/frontier_coverage_20/centered_abs_mean": 0.2993292808532715,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9125,
"signal/frontier_coverage_20/group_std_mean": 0.3749782383441925,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0043978951405733826,
"signal/frontier_coverage_20/weight": 0.014690774492919445,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0043978951405733826,
"signal/frontier_coverage_25/centered_abs_mean": 0.2993292808532715,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9125,
"signal/frontier_coverage_25/group_std_mean": 0.3749782383441925,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0043978951405733826,
"signal/frontier_coverage_25/weight": 0.014690774492919445,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0043978951405733826,
"signal/frontier_coverage_5/centered_abs_mean": 0.2993292808532715,
"signal/frontier_coverage_5/group_bin_occupancy": 0.9125,
"signal/frontier_coverage_5/group_std_mean": 0.3749782383441925,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0043978951405733826,
"signal/frontier_coverage_5/weight": 0.014690774492919445,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0043978951405733826,
"signal/frontier_ece_reward/centered_abs_mean": 0.053389621526002885,
"signal/frontier_ece_reward/group_bin_occupancy": 0.63359375,
"signal/frontier_ece_reward/group_std_mean": 0.07536848038434982,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0056538769975304605,
"signal/frontier_ece_reward/weight": 0.10597957968711853,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0056538769975304605,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35766748189926145,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.645703125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4165478765964508,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.07431503981351853,
"signal/frontier_entropy_batch_reward/weight": 0.20778882503509521,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.07431503981351853,
"step": 30
},
{
"adaptive_ema/accuracy_reward": 0.24984681717517115,
"adaptive_ema/brier_reward": 0.4419331664070724,
"adaptive_ema/format_reward": 0.7346662711486284,
"adaptive_ema/frontier_aurc_reward": 0.2576118949172257,
"adaptive_ema/frontier_coverage_1": 0.2884707104423859,
"adaptive_ema/frontier_coverage_10": 0.2884707104423859,
"adaptive_ema/frontier_coverage_15": 0.2884707104423859,
"adaptive_ema/frontier_coverage_20": 0.2884707104423859,
"adaptive_ema/frontier_coverage_25": 0.2884707104423859,
"adaptive_ema/frontier_coverage_5": 0.2884707104423859,
"adaptive_ema/frontier_ece_reward": 0.2556223252489979,
"adaptive_ema/frontier_entropy_batch_reward": -0.4271526013679908,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.080991829931736,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.015428835526108742,
"adaptive_weight/frontier_coverage_1": 0.014787467941641808,
"adaptive_weight/frontier_coverage_10": 0.014787467941641808,
"adaptive_weight/frontier_coverage_15": 0.014787467941641808,
"adaptive_weight/frontier_coverage_20": 0.014787467941641808,
"adaptive_weight/frontier_coverage_25": 0.014787467941641808,
"adaptive_weight/frontier_coverage_5": 0.014787467941641808,
"adaptive_weight/frontier_ece_reward": 0.10803199261426925,
"adaptive_weight/frontier_entropy_batch_reward": 0.20712252855300903,
"calibration/aurc": 0.47325860188954866,
"calibration/batch_distribution_entropy": 0.861042317416165,
"calibration/batch_entropy_100bins": 0.6235645062258767,
"calibration/batch_entropy_10bins": 0.861042317416165,
"calibration/batch_entropy_50bins": 0.7179239308446073,
"calibration/batch_uniqueness": 0.8427199272494017,
"calibration/buffer_distribution_entropy": 0.8889694652509454,
"calibration/buffer_entropy_100bins": 0.6126072202975525,
"calibration/buffer_entropy_10bins": 0.8889694652509454,
"calibration/buffer_entropy_50bins": 0.7119972466901847,
"calibration/confidence_entropy": 0.48597315663886775,
"calibration/coverage@0%": 0.002734375,
"calibration/coverage@1%": 0.002734375,
"calibration/coverage@10%": 0.002734375,
"calibration/coverage@15%": 0.002734375,
"calibration/coverage@20%": 0.0046875,
"calibration/coverage@25%": 0.034375,
"calibration/coverage@30%": 0.058203125,
"calibration/coverage@5%": 0.002734375,
"calibration/ece": 0.14211889568254338,
"calibration/mean_confidence": 0.34548752300222507,
"calibration/prompt_uniqueness": 0.7481377735594432,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1094.4,
"completions/max_terminated_length": 845.4,
"completions/mean_length": 139.4576171875,
"completions/mean_terminated_length": 138.77519836425782,
"completions/min_length": 26.8,
"completions/min_terminated_length": 26.8,
"epoch": 0.112,
"grad_norm": 0.003297725459560752,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 118348489.0,
"reward": 0.7215077996253967,
"reward_std": 0.13930552899837495,
"rewards/accuracy_reward": 0.383984375,
"rewards/brier_reward": 0.761021614074707,
"rewards/format_reward": 0.9966796875,
"rewards/frontier_aurc_reward": -0.004353551845997572,
"rewards/frontier_coverage_1": 0.2021395444869995,
"rewards/frontier_coverage_10": 0.2021395444869995,
"rewards/frontier_coverage_15": 0.2021395444869995,
"rewards/frontier_coverage_20": 0.2021395444869995,
"rewards/frontier_coverage_25": 0.2021395444869995,
"rewards/frontier_coverage_5": 0.2021395444869995,
"rewards/frontier_ece_reward": 0.007823611074127258,
"rewards/frontier_entropy_batch_reward": -0.23856934309005737,
"signal/accuracy_reward/centered_abs_mean": 0.18026123046875,
"signal/accuracy_reward/group_bin_occupancy": 0.201171875,
"signal/accuracy_reward/group_std_mean": 0.22736807763576508,
"signal/accuracy_reward/group_zero_std_frac": 0.390625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.090130615234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.090130615234375,
"signal/advantage_abs_mean": 0.11153837889432908,
"signal/advantage_pre_scale_abs_mean": 0.11153837889432908,
"signal/advantage_pre_scale_std": 0.1530650556087494,
"signal/advantage_std": 0.1530650556087494,
"signal/brier_reward/centered_abs_mean": 0.17593652307987212,
"signal/brier_reward/group_bin_occupancy": 0.832421875,
"signal/brier_reward/group_std_mean": 0.22441621124744415,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014248938858509063,
"signal/brier_reward/weight": 0.080991829931736,
"signal/brier_reward/weighted_centered_abs_mean": 0.014248938858509063,
"signal/format_reward/centered_abs_mean": 0.0049560546875,
"signal/format_reward/group_bin_occupancy": 0.13203125,
"signal/format_reward/group_std_mean": 0.011194882122799754,
"signal/format_reward/group_zero_std_frac": 0.94375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00247802734375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00247802734375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001741332351230085,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.715234375,
"signal/frontier_aurc_reward/group_std_mean": 0.0029535184148699045,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6885440820478835e-05,
"signal/frontier_aurc_reward/weight": 0.015428835526108742,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6885440820478835e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.30704258680343627,
"signal/frontier_coverage_1/group_bin_occupancy": 0.90625,
"signal/frontier_coverage_1/group_std_mean": 0.3823031187057495,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00453977920114994,
"signal/frontier_coverage_1/weight": 0.014787467941641808,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00453977920114994,
"signal/frontier_coverage_10/centered_abs_mean": 0.30704258680343627,
"signal/frontier_coverage_10/group_bin_occupancy": 0.90625,
"signal/frontier_coverage_10/group_std_mean": 0.3823031187057495,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00453977920114994,
"signal/frontier_coverage_10/weight": 0.014787467941641808,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00453977920114994,
"signal/frontier_coverage_15/centered_abs_mean": 0.30704258680343627,
"signal/frontier_coverage_15/group_bin_occupancy": 0.90625,
"signal/frontier_coverage_15/group_std_mean": 0.3823031187057495,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00453977920114994,
"signal/frontier_coverage_15/weight": 0.014787467941641808,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00453977920114994,
"signal/frontier_coverage_20/centered_abs_mean": 0.30704258680343627,
"signal/frontier_coverage_20/group_bin_occupancy": 0.90625,
"signal/frontier_coverage_20/group_std_mean": 0.3823031187057495,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00453977920114994,
"signal/frontier_coverage_20/weight": 0.014787467941641808,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00453977920114994,
"signal/frontier_coverage_25/centered_abs_mean": 0.30704258680343627,
"signal/frontier_coverage_25/group_bin_occupancy": 0.90625,
"signal/frontier_coverage_25/group_std_mean": 0.3823031187057495,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00453977920114994,
"signal/frontier_coverage_25/weight": 0.014787467941641808,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00453977920114994,
"signal/frontier_coverage_5/centered_abs_mean": 0.30704258680343627,
"signal/frontier_coverage_5/group_bin_occupancy": 0.90625,
"signal/frontier_coverage_5/group_std_mean": 0.3823031187057495,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00453977920114994,
"signal/frontier_coverage_5/weight": 0.014787467941641808,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00453977920114994,
"signal/frontier_ece_reward/centered_abs_mean": 0.04829949140548706,
"signal/frontier_ece_reward/group_bin_occupancy": 0.6453125,
"signal/frontier_ece_reward/group_std_mean": 0.06934612393379211,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005220386572182179,
"signal/frontier_ece_reward/weight": 0.10803199261426925,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005220386572182179,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29004823267459867,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.658984375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3490027576684952,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.06009818613529205,
"signal/frontier_entropy_batch_reward/weight": 0.20712252855300903,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.06009818613529205,
"step": 35
},
{
"adaptive_ema/accuracy_reward": 0.25757596134861205,
"adaptive_ema/brier_reward": 0.4572738365559509,
"adaptive_ema/format_reward": 0.7475636501642173,
"adaptive_ema/frontier_aurc_reward": 0.2447767752529431,
"adaptive_ema/frontier_coverage_1": 0.2823296902337432,
"adaptive_ema/frontier_coverage_10": 0.2823296902337432,
"adaptive_ema/frontier_coverage_15": 0.2823296902337432,
"adaptive_ema/frontier_coverage_20": 0.2823296902337432,
"adaptive_ema/frontier_coverage_25": 0.2823296902337432,
"adaptive_ema/frontier_coverage_5": 0.2823296902337432,
"adaptive_ema/frontier_ece_reward": 0.24354919909770872,
"adaptive_ema/frontier_entropy_batch_reward": -0.4113240754585915,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.07904001474380493,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.015750362537801266,
"adaptive_weight/frontier_coverage_1": 0.014967151544988155,
"adaptive_weight/frontier_coverage_10": 0.014967151544988155,
"adaptive_weight/frontier_coverage_15": 0.014967151544988155,
"adaptive_weight/frontier_coverage_20": 0.014967151544988155,
"adaptive_weight/frontier_coverage_25": 0.014967151544988155,
"adaptive_weight/frontier_coverage_5": 0.014967151544988155,
"adaptive_weight/frontier_ece_reward": 0.11016732156276703,
"adaptive_weight/frontier_entropy_batch_reward": 0.20553938448429107,
"calibration/aurc": 0.4955706475359262,
"calibration/batch_distribution_entropy": 0.9012512573648932,
"calibration/batch_entropy_100bins": 0.6357494389080675,
"calibration/batch_entropy_10bins": 0.9012512573648932,
"calibration/batch_entropy_50bins": 0.7350456436079122,
"calibration/batch_uniqueness": 0.858147530472006,
"calibration/buffer_distribution_entropy": 0.9099325834432286,
"calibration/buffer_entropy_100bins": 0.6295079060520901,
"calibration/buffer_entropy_10bins": 0.9099325834432286,
"calibration/buffer_entropy_50bins": 0.7297726062982012,
"calibration/confidence_entropy": 0.5300262596907609,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.010176125244618394,
"calibration/coverage@15%": 0.010176125244618394,
"calibration/coverage@20%": 0.018395303326810174,
"calibration/coverage@25%": 0.041487279843444226,
"calibration/coverage@30%": 0.05870841487279843,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.13868593730009668,
"calibration/mean_confidence": 0.44378032542648127,
"calibration/prompt_uniqueness": 0.7742377528290844,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1163.2,
"completions/max_terminated_length": 522.4,
"completions/mean_length": 147.51142578125,
"completions/mean_terminated_length": 146.96887817382813,
"completions/min_length": 45.2,
"completions/min_terminated_length": 45.2,
"epoch": 0.128,
"grad_norm": 0.0016533477464690804,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 134775678.0,
"reward": 0.7621492624282837,
"reward_std": 0.1226007416844368,
"rewards/accuracy_reward": 0.400390625,
"rewards/brier_reward": 0.7549683094024658,
"rewards/format_reward": 0.9986328125,
"rewards/frontier_aurc_reward": -0.004399614129215479,
"rewards/frontier_coverage_1": 0.15388748794794083,
"rewards/frontier_coverage_10": 0.15388748794794083,
"rewards/frontier_coverage_15": 0.15388748794794083,
"rewards/frontier_coverage_20": 0.15388748794794083,
"rewards/frontier_coverage_25": 0.15388748794794083,
"rewards/frontier_coverage_5": 0.15388748794794083,
"rewards/frontier_ece_reward": 0.007644195389002561,
"rewards/frontier_entropy_batch_reward": -0.057697060704231265,
"signal/accuracy_reward/centered_abs_mean": 0.15521240234375,
"signal/accuracy_reward/group_bin_occupancy": 0.19296875,
"signal/accuracy_reward/group_std_mean": 0.19830750823020935,
"signal/accuracy_reward/group_zero_std_frac": 0.45625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.077606201171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.077606201171875,
"signal/advantage_abs_mean": 0.09616784155368804,
"signal/advantage_pre_scale_abs_mean": 0.09616784155368804,
"signal/advantage_pre_scale_std": 0.13922219276428222,
"signal/advantage_std": 0.13922219276428222,
"signal/brier_reward/centered_abs_mean": 0.17729856967926025,
"signal/brier_reward/group_bin_occupancy": 0.86796875,
"signal/brier_reward/group_std_mean": 0.22361719012260436,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014017242379486561,
"signal/brier_reward/weight": 0.07904001474380493,
"signal/brier_reward/weighted_centered_abs_mean": 0.014017242379486561,
"signal/format_reward/centered_abs_mean": 0.00264892578125,
"signal/format_reward/group_bin_occupancy": 0.13046875,
"signal/format_reward/group_std_mean": 0.0077339802403002976,
"signal/format_reward/group_zero_std_frac": 0.95625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001324462890625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001324462890625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002261793240904808,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.735546875,
"signal/frontier_aurc_reward/group_std_mean": 0.0035905469208955764,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5628071054816245e-05,
"signal/frontier_aurc_reward/weight": 0.015750362537801266,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5628071054816245e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.25459115505218505,
"signal/frontier_coverage_1/group_bin_occupancy": 0.90859375,
"signal/frontier_coverage_1/group_std_mean": 0.3230483055114746,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038090549409389497,
"signal/frontier_coverage_1/weight": 0.014967151544988155,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038090549409389497,
"signal/frontier_coverage_10/centered_abs_mean": 0.25459115505218505,
"signal/frontier_coverage_10/group_bin_occupancy": 0.90859375,
"signal/frontier_coverage_10/group_std_mean": 0.3230483055114746,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038090549409389497,
"signal/frontier_coverage_10/weight": 0.014967151544988155,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038090549409389497,
"signal/frontier_coverage_15/centered_abs_mean": 0.25459115505218505,
"signal/frontier_coverage_15/group_bin_occupancy": 0.90859375,
"signal/frontier_coverage_15/group_std_mean": 0.3230483055114746,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038090549409389497,
"signal/frontier_coverage_15/weight": 0.014967151544988155,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038090549409389497,
"signal/frontier_coverage_20/centered_abs_mean": 0.25459115505218505,
"signal/frontier_coverage_20/group_bin_occupancy": 0.90859375,
"signal/frontier_coverage_20/group_std_mean": 0.3230483055114746,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038090549409389497,
"signal/frontier_coverage_20/weight": 0.014967151544988155,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038090549409389497,
"signal/frontier_coverage_25/centered_abs_mean": 0.25459115505218505,
"signal/frontier_coverage_25/group_bin_occupancy": 0.90859375,
"signal/frontier_coverage_25/group_std_mean": 0.3230483055114746,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038090549409389497,
"signal/frontier_coverage_25/weight": 0.014967151544988155,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038090549409389497,
"signal/frontier_coverage_5/centered_abs_mean": 0.25459115505218505,
"signal/frontier_coverage_5/group_bin_occupancy": 0.90859375,
"signal/frontier_coverage_5/group_std_mean": 0.3230483055114746,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038090549409389497,
"signal/frontier_coverage_5/weight": 0.014967151544988155,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038090549409389497,
"signal/frontier_ece_reward/centered_abs_mean": 0.06426123976707458,
"signal/frontier_ece_reward/group_bin_occupancy": 0.73671875,
"signal/frontier_ece_reward/group_std_mean": 0.08365523815155029,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007080427370965481,
"signal/frontier_ece_reward/weight": 0.11016732156276703,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007080427370965481,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.12160103470087051,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.61484375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.1600523829460144,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024980336800217627,
"signal/frontier_entropy_batch_reward/weight": 0.20553938448429107,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024980336800217627,
"step": 40
},
{
"adaptive_ema/accuracy_reward": 0.2671619133001659,
"adaptive_ema/brier_reward": 0.4718947123865959,
"adaptive_ema/format_reward": 0.7598753433415034,
"adaptive_ema/frontier_aurc_reward": 0.2325843612368374,
"adaptive_ema/frontier_coverage_1": 0.27418200755577676,
"adaptive_ema/frontier_coverage_10": 0.27418200755577676,
"adaptive_ema/frontier_coverage_15": 0.27418200755577676,
"adaptive_ema/frontier_coverage_20": 0.27418200755577676,
"adaptive_ema/frontier_coverage_25": 0.27418200755577676,
"adaptive_ema/frontier_coverage_5": 0.27418200755577676,
"adaptive_ema/frontier_ece_reward": 0.23242151496023716,
"adaptive_ema/frontier_entropy_batch_reward": -0.39476656485279743,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.07716480493545533,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.01605743505060673,
"adaptive_weight/frontier_coverage_1": 0.015187039040029049,
"adaptive_weight/frontier_coverage_10": 0.015187039040029049,
"adaptive_weight/frontier_coverage_15": 0.015187039040029049,
"adaptive_weight/frontier_coverage_20": 0.015187039040029049,
"adaptive_weight/frontier_coverage_25": 0.015187039040029049,
"adaptive_weight/frontier_coverage_5": 0.015187039040029049,
"adaptive_weight/frontier_ece_reward": 0.11215667873620987,
"adaptive_weight/frontier_entropy_batch_reward": 0.20379884541034698,
"calibration/aurc": 0.3362157821524915,
"calibration/batch_distribution_entropy": 0.9216198855891935,
"calibration/batch_entropy_100bins": 0.6318047884404534,
"calibration/batch_entropy_10bins": 0.9216198855891935,
"calibration/batch_entropy_50bins": 0.7343137477556684,
"calibration/batch_uniqueness": 0.8617429476204027,
"calibration/buffer_distribution_entropy": 0.9184048720902066,
"calibration/buffer_entropy_100bins": 0.6374289092099044,
"calibration/buffer_entropy_10bins": 0.9184048720902066,
"calibration/buffer_entropy_50bins": 0.7380459109803228,
"calibration/confidence_entropy": 0.5135767288905108,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.029296875,
"calibration/coverage@15%": 0.1109375,
"calibration/coverage@20%": 0.17307057240704501,
"calibration/coverage@25%": 0.2977655638454012,
"calibration/coverage@30%": 0.4256421232876712,
"calibration/coverage@5%": 0.019140625,
"calibration/ece": 0.14798410352513824,
"calibration/mean_confidence": 0.5187316618199427,
"calibration/prompt_uniqueness": 0.7675874974267695,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 1004.4,
"completions/max_terminated_length": 751.0,
"completions/mean_length": 154.86259765625,
"completions/mean_terminated_length": 154.59256896972656,
"completions/min_length": 51.8,
"completions/min_terminated_length": 51.8,
"epoch": 0.144,
"grad_norm": 0.0019156603375449777,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 151311903.0,
"reward": 0.8003306746482849,
"reward_std": 0.12828224003314972,
"rewards/accuracy_reward": 0.49716796875,
"rewards/brier_reward": 0.7543691277503968,
"rewards/format_reward": 0.998828125,
"rewards/frontier_aurc_reward": -0.0035826864186674356,
"rewards/frontier_coverage_1": 0.08718824163079261,
"rewards/frontier_coverage_10": 0.08718824163079261,
"rewards/frontier_coverage_15": 0.08718824163079261,
"rewards/frontier_coverage_20": 0.08718824163079261,
"rewards/frontier_coverage_25": 0.08718824163079261,
"rewards/frontier_coverage_5": 0.08718824163079261,
"rewards/frontier_ece_reward": 0.02351235654205084,
"rewards/frontier_entropy_batch_reward": -0.08158636391162873,
"signal/accuracy_reward/centered_abs_mean": 0.155462646484375,
"signal/accuracy_reward/group_bin_occupancy": 0.199609375,
"signal/accuracy_reward/group_std_mean": 0.2071024954319,
"signal/accuracy_reward/group_zero_std_frac": 0.403125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0777313232421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0777313232421875,
"signal/advantage_abs_mean": 0.0987144574522972,
"signal/advantage_pre_scale_abs_mean": 0.0987144574522972,
"signal/advantage_pre_scale_std": 0.14342830181121827,
"signal/advantage_std": 0.14342830181121827,
"signal/brier_reward/centered_abs_mean": 0.17776857912540436,
"signal/brier_reward/group_bin_occupancy": 0.860546875,
"signal/brier_reward/group_std_mean": 0.2245795577764511,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013717547245323657,
"signal/brier_reward/weight": 0.07716480493545533,
"signal/brier_reward/weighted_centered_abs_mean": 0.013717547245323657,
"signal/format_reward/centered_abs_mean": 0.00223388671875,
"signal/format_reward/group_bin_occupancy": 0.12890625,
"signal/format_reward/group_std_mean": 0.005897296266630292,
"signal/format_reward/group_zero_std_frac": 0.96875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001116943359375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001116943359375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002920934371650219,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.75,
"signal/frontier_aurc_reward/group_std_mean": 0.004523701500147581,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.691151989391074e-05,
"signal/frontier_aurc_reward/weight": 0.01605743505060673,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.691151989391074e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22277138829231263,
"signal/frontier_coverage_1/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_1/group_std_mean": 0.29276385307312014,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033829107880592344,
"signal/frontier_coverage_1/weight": 0.015187039040029049,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033829107880592344,
"signal/frontier_coverage_10/centered_abs_mean": 0.22277138829231263,
"signal/frontier_coverage_10/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_10/group_std_mean": 0.29276385307312014,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033829107880592344,
"signal/frontier_coverage_10/weight": 0.015187039040029049,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033829107880592344,
"signal/frontier_coverage_15/centered_abs_mean": 0.22277138829231263,
"signal/frontier_coverage_15/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_15/group_std_mean": 0.29276385307312014,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033829107880592344,
"signal/frontier_coverage_15/weight": 0.015187039040029049,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033829107880592344,
"signal/frontier_coverage_20/centered_abs_mean": 0.22277138829231263,
"signal/frontier_coverage_20/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_20/group_std_mean": 0.29276385307312014,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033829107880592344,
"signal/frontier_coverage_20/weight": 0.015187039040029049,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033829107880592344,
"signal/frontier_coverage_25/centered_abs_mean": 0.22277138829231263,
"signal/frontier_coverage_25/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_25/group_std_mean": 0.29276385307312014,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033829107880592344,
"signal/frontier_coverage_25/weight": 0.015187039040029049,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033829107880592344,
"signal/frontier_coverage_5/centered_abs_mean": 0.22277138829231263,
"signal/frontier_coverage_5/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_5/group_std_mean": 0.29276385307312014,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033829107880592344,
"signal/frontier_coverage_5/weight": 0.015187039040029049,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033829107880592344,
"signal/frontier_ece_reward/centered_abs_mean": 0.06752243638038635,
"signal/frontier_ece_reward/group_bin_occupancy": 0.763671875,
"signal/frontier_ece_reward/group_std_mean": 0.08624927401542663,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0075724986381828785,
"signal/frontier_ece_reward/weight": 0.11215667873620987,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0075724986381828785,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1393910378217697,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.584765625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.17833027690649034,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028399351984262466,
"signal/frontier_entropy_batch_reward/weight": 0.20379884541034698,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028399351984262466,
"step": 45
},
{
"adaptive_ema/accuracy_reward": 0.27738092581450313,
"adaptive_ema/brier_reward": 0.48569612856662836,
"adaptive_ema/format_reward": 0.7715978268562566,
"adaptive_ema/frontier_aurc_reward": 0.22100064226531516,
"adaptive_ema/frontier_coverage_1": 0.2659692743136882,
"adaptive_ema/frontier_coverage_10": 0.2659692743136882,
"adaptive_ema/frontier_coverage_15": 0.2659692743136882,
"adaptive_ema/frontier_coverage_20": 0.2659692743136882,
"adaptive_ema/frontier_coverage_25": 0.2659692743136882,
"adaptive_ema/frontier_coverage_5": 0.2659692743136882,
"adaptive_ema/frontier_ece_reward": 0.22203780923501104,
"adaptive_ema/frontier_entropy_batch_reward": -0.37876009305917974,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.07538380771875382,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.016350987181067467,
"adaptive_weight/frontier_coverage_1": 0.015407079830765724,
"adaptive_weight/frontier_coverage_10": 0.015407079830765724,
"adaptive_weight/frontier_coverage_15": 0.015407079830765724,
"adaptive_weight/frontier_coverage_20": 0.015407079830765724,
"adaptive_weight/frontier_coverage_25": 0.015407079830765724,
"adaptive_weight/frontier_coverage_5": 0.015407079830765724,
"adaptive_weight/frontier_ece_reward": 0.11403079777956009,
"adaptive_weight/frontier_entropy_batch_reward": 0.20209192335605622,
"calibration/aurc": 0.38348908347214267,
"calibration/batch_distribution_entropy": 0.9456007988080287,
"calibration/batch_entropy_100bins": 0.6593447663249573,
"calibration/batch_entropy_10bins": 0.9456007988080287,
"calibration/batch_entropy_50bins": 0.7599760050408886,
"calibration/batch_uniqueness": 0.8793887918154031,
"calibration/buffer_distribution_entropy": 0.9236976372936251,
"calibration/buffer_entropy_100bins": 0.6426565436461602,
"calibration/buffer_entropy_10bins": 0.9236976372936251,
"calibration/buffer_entropy_50bins": 0.7432587692609751,
"calibration/confidence_entropy": 0.4798428379337942,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.022745098039215685,
"calibration/coverage@20%": 0.07201281068934423,
"calibration/coverage@25%": 0.10175239402267758,
"calibration/coverage@30%": 0.23394594053374776,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.10627890594104014,
"calibration/mean_confidence": 0.49457840209347503,
"calibration/prompt_uniqueness": 0.7750884495317377,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1312.2,
"completions/max_terminated_length": 465.2,
"completions/mean_length": 159.719140625,
"completions/mean_terminated_length": 159.0467041015625,
"completions/min_length": 62.4,
"completions/min_terminated_length": 62.4,
"epoch": 0.16,
"grad_norm": 0.0052760387770831585,
"learning_rate": 1e-06,
"loss": 0.0011,
"num_tokens": 167968355.0,
"reward": 0.7870096206665039,
"reward_std": 0.12264777570962906,
"rewards/accuracy_reward": 0.451171875,
"rewards/brier_reward": 0.7576644659042359,
"rewards/format_reward": 0.9990234375,
"rewards/frontier_aurc_reward": -0.0038807093631476166,
"rewards/frontier_coverage_1": 0.13388580977916717,
"rewards/frontier_coverage_10": 0.13388580977916717,
"rewards/frontier_coverage_15": 0.13388580977916717,
"rewards/frontier_coverage_20": 0.13388580977916717,
"rewards/frontier_coverage_25": 0.13388580977916717,
"rewards/frontier_coverage_5": 0.13388580977916717,
"rewards/frontier_ece_reward": 0.019111651740968228,
"rewards/frontier_entropy_batch_reward": -0.04900626316666603,
"signal/accuracy_reward/centered_abs_mean": 0.15225830078125,
"signal/accuracy_reward/group_bin_occupancy": 0.19375,
"signal/accuracy_reward/group_std_mean": 0.19692128896713257,
"signal/accuracy_reward/group_zero_std_frac": 0.45,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.076129150390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.076129150390625,
"signal/advantage_abs_mean": 0.09508385807275772,
"signal/advantage_pre_scale_abs_mean": 0.09508385807275772,
"signal/advantage_pre_scale_std": 0.14095271229743958,
"signal/advantage_std": 0.14095271229743958,
"signal/brier_reward/centered_abs_mean": 0.1855572283267975,
"signal/brier_reward/group_bin_occupancy": 0.851953125,
"signal/brier_reward/group_std_mean": 0.23422395586967468,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013987931795418263,
"signal/brier_reward/weight": 0.07538380771875382,
"signal/brier_reward/weighted_centered_abs_mean": 0.013987931795418263,
"signal/format_reward/centered_abs_mean": 0.0018798828125,
"signal/format_reward/group_bin_occupancy": 0.128515625,
"signal/format_reward/group_std_mean": 0.005187963135540485,
"signal/format_reward/group_zero_std_frac": 0.971875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00093994140625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003281982522457838,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.742578125,
"signal/frontier_aurc_reward/group_std_mean": 0.004983571451157331,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3681767894886434e-05,
"signal/frontier_aurc_reward/weight": 0.016350987181067467,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3681767894886434e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22950102984905243,
"signal/frontier_coverage_1/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_1/group_std_mean": 0.2979970157146454,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035361848771572114,
"signal/frontier_coverage_1/weight": 0.015407079830765724,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035361848771572114,
"signal/frontier_coverage_10/centered_abs_mean": 0.22950102984905243,
"signal/frontier_coverage_10/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_10/group_std_mean": 0.2979970157146454,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035361848771572114,
"signal/frontier_coverage_10/weight": 0.015407079830765724,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035361848771572114,
"signal/frontier_coverage_15/centered_abs_mean": 0.22950102984905243,
"signal/frontier_coverage_15/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_15/group_std_mean": 0.2979970157146454,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035361848771572114,
"signal/frontier_coverage_15/weight": 0.015407079830765724,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035361848771572114,
"signal/frontier_coverage_20/centered_abs_mean": 0.22950102984905243,
"signal/frontier_coverage_20/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_20/group_std_mean": 0.2979970157146454,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035361848771572114,
"signal/frontier_coverage_20/weight": 0.015407079830765724,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035361848771572114,
"signal/frontier_coverage_25/centered_abs_mean": 0.22950102984905243,
"signal/frontier_coverage_25/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_25/group_std_mean": 0.2979970157146454,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035361848771572114,
"signal/frontier_coverage_25/weight": 0.015407079830765724,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035361848771572114,
"signal/frontier_coverage_5/centered_abs_mean": 0.22950102984905243,
"signal/frontier_coverage_5/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_5/group_std_mean": 0.2979970157146454,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035361848771572114,
"signal/frontier_coverage_5/weight": 0.015407079830765724,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035361848771572114,
"signal/frontier_ece_reward/centered_abs_mean": 0.06420275643467903,
"signal/frontier_ece_reward/group_bin_occupancy": 0.766015625,
"signal/frontier_ece_reward/group_std_mean": 0.08285662680864334,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007319671288132668,
"signal/frontier_ece_reward/weight": 0.11403079777956009,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007319671288132668,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08867169320583343,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.583984375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.115115886926651,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01792623642832041,
"signal/frontier_entropy_batch_reward/weight": 0.20209192335605622,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01792623642832041,
"step": 50
},
{
"epoch": 0.16,
"eval_calibration/aurc": 0.5912737570825037,
"eval_calibration/batch_distribution_entropy": 0.8838553804869622,
"eval_calibration/batch_entropy_100bins": 0.5698017244122562,
"eval_calibration/batch_entropy_10bins": 0.8838553804869622,
"eval_calibration/batch_entropy_50bins": 0.6635008310587025,
"eval_calibration/batch_uniqueness": 0.8337689418574401,
"eval_calibration/buffer_distribution_entropy": 0.9266873253076938,
"eval_calibration/buffer_entropy_100bins": 0.6462848756942279,
"eval_calibration/buffer_entropy_10bins": 0.9266873253076938,
"eval_calibration/buffer_entropy_50bins": 0.746800332589743,
"eval_calibration/confidence_entropy": 0.46311622799423224,
"eval_calibration/coverage@0%": 0.0,
"eval_calibration/coverage@1%": 0.0,
"eval_calibration/coverage@10%": 0.0,
"eval_calibration/coverage@15%": 0.0,
"eval_calibration/coverage@20%": 0.0,
"eval_calibration/coverage@25%": 0.0,
"eval_calibration/coverage@30%": 0.0,
"eval_calibration/coverage@5%": 0.0,
"eval_calibration/ece": 0.24023437500000003,
"eval_calibration/mean_confidence": 0.4886668346774193,
"eval_calibration/prompt_uniqueness": 0.8337689418574401,
"eval_completions/clipped_ratio": 0.001953125,
"eval_completions/max_length": 661.75,
"eval_completions/max_terminated_length": 361.5,
"eval_completions/mean_length": 165.89580917358398,
"eval_completions/mean_terminated_length": 163.20844650268555,
"eval_completions/min_length": 80.0,
"eval_completions/min_terminated_length": 80.0,
"eval_loss": 0.0,
"eval_num_tokens": 167968355.0,
"eval_reward": 0.704633966088295,
"eval_reward_std": 0.23834892362356186,
"eval_rewards/accuracy_reward": 0.36328125,
"eval_rewards/brier_reward": 0.741399809718132,
"eval_rewards/format_reward": 0.998046875,
"eval_rewards/frontier_aurc_reward": -0.004931134288199246,
"eval_rewards/frontier_coverage_1": 0.18837910518050194,
"eval_rewards/frontier_coverage_10": 0.18837910518050194,
"eval_rewards/frontier_coverage_15": 0.18837910518050194,
"eval_rewards/frontier_coverage_20": 0.18837910518050194,
"eval_rewards/frontier_coverage_25": 0.18837910518050194,
"eval_rewards/frontier_coverage_5": 0.18837910518050194,
"eval_rewards/frontier_ece_reward": 0.005667033372446895,
"eval_rewards/frontier_entropy_batch_reward": -0.24560075998306274,
"eval_runtime": 28.0344,
"eval_samples_per_second": 17.835,
"eval_signal/accuracy_reward/centered_abs_mean": 0.44580078125,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.4784899652004242,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.222900390625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.222900390625,
"eval_signal/advantage_abs_mean": 0.20399565622210503,
"eval_signal/advantage_pre_scale_abs_mean": 0.20399565622210503,
"eval_signal/advantage_pre_scale_std": 0.23601685464382172,
"eval_signal/advantage_std": 0.23601685464382172,
"eval_signal/brier_reward/centered_abs_mean": 0.23530442267656326,
"eval_signal/brier_reward/group_bin_occupancy": 0.9375,
"eval_signal/brier_reward/group_std_mean": 0.28814616054296494,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01757534919306636,
"eval_signal/brier_reward/weight": 0.07469195872545242,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01757534919306636,
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/format_reward/group_bin_occupancy": 0.1328125,
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0044213252840563655,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7890625,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.006909370771609247,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.282986189238727e-05,
"eval_signal/frontier_aurc_reward/weight": 0.016472404822707176,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.282986189238727e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.34682943671941757,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_1/group_std_mean": 0.44357454776763916,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0053692207438871264,
"eval_signal/frontier_coverage_1/weight": 0.015480867587029934,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0053692207438871264,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.34682943671941757,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_10/group_std_mean": 0.44357454776763916,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0053692207438871264,
"eval_signal/frontier_coverage_10/weight": 0.015480867587029934,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0053692207438871264,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.34682943671941757,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_15/group_std_mean": 0.44357454776763916,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0053692207438871264,
"eval_signal/frontier_coverage_15/weight": 0.015480867587029934,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0053692207438871264,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.34682943671941757,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_20/group_std_mean": 0.44357454776763916,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0053692207438871264,
"eval_signal/frontier_coverage_20/weight": 0.015480867587029934,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0053692207438871264,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.34682943671941757,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.44357454776763916,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0053692207438871264,
"eval_signal/frontier_coverage_25/weight": 0.015480867587029934,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0053692207438871264,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.34682943671941757,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_5/group_std_mean": 0.44357454776763916,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0053692207438871264,
"eval_signal/frontier_coverage_5/weight": 0.015480867587029934,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0053692207438871264,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.07570397295057774,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9375,
"eval_signal/frontier_ece_reward/group_std_mean": 0.10851926729083061,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008691624971106648,
"eval_signal/frontier_ece_reward/weight": 0.11481068283319473,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008691624971106648,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3352913558483124,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.53125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.39990557730197906,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.06754100508987904,
"eval_signal/frontier_entropy_batch_reward/weight": 0.2014397382736206,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.06754100508987904,
"eval_steps_per_second": 0.143,
"step": 50
},
{
"adaptive_ema/accuracy_reward": 0.2851488862848163,
"adaptive_ema/brier_reward": 0.49900272362611686,
"adaptive_ema/format_reward": 0.7827584087449388,
"adaptive_ema/frontier_aurc_reward": 0.2099823909624985,
"adaptive_ema/frontier_coverage_1": 0.2605413524021998,
"adaptive_ema/frontier_coverage_10": 0.2605413524021998,
"adaptive_ema/frontier_coverage_15": 0.2605413524021998,
"adaptive_ema/frontier_coverage_20": 0.2605413524021998,
"adaptive_ema/frontier_coverage_25": 0.2605413524021998,
"adaptive_ema/frontier_coverage_5": 0.2605413524021998,
"adaptive_ema/frontier_ece_reward": 0.21204188899333073,
"adaptive_ema/frontier_entropy_batch_reward": -0.36382088058690953,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.07369283884763718,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.01664076782763004,
"adaptive_weight/frontier_coverage_1": 0.015575786866247655,
"adaptive_weight/frontier_coverage_10": 0.015575786866247655,
"adaptive_weight/frontier_coverage_15": 0.015575786866247655,
"adaptive_weight/frontier_coverage_20": 0.015575786866247655,
"adaptive_weight/frontier_coverage_25": 0.015575786866247655,
"adaptive_weight/frontier_coverage_5": 0.015575786866247655,
"adaptive_weight/frontier_ece_reward": 0.11590351611375808,
"adaptive_weight/frontier_entropy_batch_reward": 0.20060815215110778,
"calibration/aurc": 0.42473116107063263,
"calibration/batch_distribution_entropy": 0.9512775063954914,
"calibration/batch_entropy_100bins": 0.664655780835464,
"calibration/batch_entropy_10bins": 0.9512775063954914,
"calibration/batch_entropy_50bins": 0.7677863296635008,
"calibration/batch_uniqueness": 0.8851788600876166,
"calibration/buffer_distribution_entropy": 0.9283644128738313,
"calibration/buffer_entropy_100bins": 0.6481654925489638,
"calibration/buffer_entropy_10bins": 0.9283644128738313,
"calibration/buffer_entropy_50bins": 0.7487002117614024,
"calibration/confidence_entropy": 0.45550758726326446,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.02815563725490196,
"calibration/coverage@25%": 0.03518688725490196,
"calibration/coverage@30%": 0.14534313725490194,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.18896995251225493,
"calibration/mean_confidence": 0.5027503140318628,
"calibration/prompt_uniqueness": 0.7720712270746619,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 899.4,
"completions/max_terminated_length": 475.6,
"completions/mean_length": 166.46630859375,
"completions/mean_terminated_length": 166.06537475585938,
"completions/min_length": 62.8,
"completions/min_terminated_length": 62.8,
"epoch": 0.176,
"grad_norm": 0.016879109665751457,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 184910090.0,
"reward": 0.7733013749122619,
"reward_std": 0.11779149472713471,
"rewards/accuracy_reward": 0.43916015625,
"rewards/brier_reward": 0.7471538543701172,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.003878576587885618,
"rewards/frontier_coverage_1": 0.14781001955270767,
"rewards/frontier_coverage_10": 0.14781001955270767,
"rewards/frontier_coverage_15": 0.14781001955270767,
"rewards/frontier_coverage_20": 0.14781001955270767,
"rewards/frontier_coverage_25": 0.14781001955270767,
"rewards/frontier_coverage_5": 0.14781001955270767,
"rewards/frontier_ece_reward": 0.01585045214742422,
"rewards/frontier_entropy_batch_reward": -0.0840143196284771,
"signal/accuracy_reward/centered_abs_mean": 0.150177001953125,
"signal/accuracy_reward/group_bin_occupancy": 0.192578125,
"signal/accuracy_reward/group_std_mean": 0.194138702750206,
"signal/accuracy_reward/group_zero_std_frac": 0.459375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0750885009765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0750885009765625,
"signal/advantage_abs_mean": 0.09202968776226043,
"signal/advantage_pre_scale_abs_mean": 0.09202968776226043,
"signal/advantage_pre_scale_std": 0.1346214473247528,
"signal/advantage_std": 0.1346214473247528,
"signal/brier_reward/centered_abs_mean": 0.19577476978302003,
"signal/brier_reward/group_bin_occupancy": 0.840625,
"signal/brier_reward/group_std_mean": 0.2449037402868271,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014425278082489968,
"signal/brier_reward/weight": 0.07369283884763718,
"signal/brier_reward/weighted_centered_abs_mean": 0.014425278082489968,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.002762135770171881,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0033737838268280028,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72578125,
"signal/frontier_aurc_reward/group_std_mean": 0.005213375855237246,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.614309338852763e-05,
"signal/frontier_aurc_reward/weight": 0.01664076782763004,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.614309338852763e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2555152654647827,
"signal/frontier_coverage_1/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_1/group_std_mean": 0.32543211579322817,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003980266209691763,
"signal/frontier_coverage_1/weight": 0.015575786866247655,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003980266209691763,
"signal/frontier_coverage_10/centered_abs_mean": 0.2555152654647827,
"signal/frontier_coverage_10/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_10/group_std_mean": 0.32543211579322817,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003980266209691763,
"signal/frontier_coverage_10/weight": 0.015575786866247655,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003980266209691763,
"signal/frontier_coverage_15/centered_abs_mean": 0.2555152654647827,
"signal/frontier_coverage_15/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_15/group_std_mean": 0.32543211579322817,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003980266209691763,
"signal/frontier_coverage_15/weight": 0.015575786866247655,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003980266209691763,
"signal/frontier_coverage_20/centered_abs_mean": 0.2555152654647827,
"signal/frontier_coverage_20/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_20/group_std_mean": 0.32543211579322817,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003980266209691763,
"signal/frontier_coverage_20/weight": 0.015575786866247655,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003980266209691763,
"signal/frontier_coverage_25/centered_abs_mean": 0.2555152654647827,
"signal/frontier_coverage_25/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_25/group_std_mean": 0.32543211579322817,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003980266209691763,
"signal/frontier_coverage_25/weight": 0.015575786866247655,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003980266209691763,
"signal/frontier_coverage_5/centered_abs_mean": 0.2555152654647827,
"signal/frontier_coverage_5/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_5/group_std_mean": 0.32543211579322817,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003980266209691763,
"signal/frontier_coverage_5/weight": 0.015575786866247655,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003980266209691763,
"signal/frontier_ece_reward/centered_abs_mean": 0.056504715234041214,
"signal/frontier_ece_reward/group_bin_occupancy": 0.734765625,
"signal/frontier_ece_reward/group_std_mean": 0.07384437769651413,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006548455357551575,
"signal/frontier_ece_reward/weight": 0.11590351611375808,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006548455357551575,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.13305359482765197,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.537890625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.16954652667045594,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026704121381044388,
"signal/frontier_entropy_batch_reward/weight": 0.20060815215110778,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026704121381044388,
"step": 55
},
{
"adaptive_ema/accuracy_reward": 0.2937015969328449,
"adaptive_ema/brier_reward": 0.5114010685111742,
"adaptive_ema/format_reward": 0.7933795053837518,
"adaptive_ema/frontier_aurc_reward": 0.19951788453686373,
"adaptive_ema/frontier_coverage_1": 0.2544931446820287,
"adaptive_ema/frontier_coverage_10": 0.2544931446820287,
"adaptive_ema/frontier_coverage_15": 0.2544931446820287,
"adaptive_ema/frontier_coverage_20": 0.2544931446820287,
"adaptive_ema/frontier_coverage_25": 0.2544931446820287,
"adaptive_ema/frontier_coverage_5": 0.2544931446820287,
"adaptive_ema/frontier_ece_reward": 0.2025474157793294,
"adaptive_ema/frontier_entropy_batch_reward": -0.3494971511591881,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.07209239751100541,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.016913557425141335,
"adaptive_weight/frontier_coverage_1": 0.01575196459889412,
"adaptive_weight/frontier_coverage_10": 0.01575196459889412,
"adaptive_weight/frontier_coverage_15": 0.01575196459889412,
"adaptive_weight/frontier_coverage_20": 0.01575196459889412,
"adaptive_weight/frontier_coverage_25": 0.01575196459889412,
"adaptive_weight/frontier_coverage_5": 0.01575196459889412,
"adaptive_weight/frontier_ece_reward": 0.1176643967628479,
"adaptive_weight/frontier_entropy_batch_reward": 0.19911785721778869,
"calibration/aurc": 0.3647552716722237,
"calibration/batch_distribution_entropy": 0.9517000612178709,
"calibration/batch_entropy_100bins": 0.6801240206424846,
"calibration/batch_entropy_10bins": 0.9517000612178709,
"calibration/batch_entropy_50bins": 0.7834312829799275,
"calibration/batch_uniqueness": 0.889671378153011,
"calibration/buffer_distribution_entropy": 0.9329493751260302,
"calibration/buffer_entropy_100bins": 0.6537943828426254,
"calibration/buffer_entropy_10bins": 0.9329493751260302,
"calibration/buffer_entropy_50bins": 0.7544689893599347,
"calibration/confidence_entropy": 0.46903725087093895,
"calibration/coverage@0%": 0.0050804182974559685,
"calibration/coverage@1%": 0.0050804182974559685,
"calibration/coverage@10%": 0.025002293297455968,
"calibration/coverage@15%": 0.03359604329745597,
"calibration/coverage@20%": 0.07428219789628179,
"calibration/coverage@25%": 0.1606837084148728,
"calibration/coverage@30%": 0.24316368028375734,
"calibration/coverage@5%": 0.0050804182974559685,
"calibration/ece": 0.12325721157197897,
"calibration/mean_confidence": 0.46352088132185665,
"calibration/prompt_uniqueness": 0.7723385369244277,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 781.4,
"completions/max_terminated_length": 574.4,
"completions/mean_length": 174.29404296875,
"completions/mean_terminated_length": 174.160693359375,
"completions/min_length": 60.2,
"completions/min_terminated_length": 60.2,
"epoch": 0.192,
"grad_norm": 0.0022992538288235664,
"learning_rate": 1e-06,
"loss": -0.0005,
"num_tokens": 201509677.0,
"reward": 0.7893312335014343,
"reward_std": 0.10895285159349441,
"rewards/accuracy_reward": 0.4681640625,
"rewards/brier_reward": 0.7553203701972961,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0033560583367943765,
"rewards/frontier_coverage_1": 0.13129240423440933,
"rewards/frontier_coverage_10": 0.13129240423440933,
"rewards/frontier_coverage_15": 0.13129240423440933,
"rewards/frontier_coverage_20": 0.13129240423440933,
"rewards/frontier_coverage_25": 0.13129240423440933,
"rewards/frontier_coverage_5": 0.13129240423440933,
"rewards/frontier_ece_reward": 0.019136627763509752,
"rewards/frontier_entropy_batch_reward": -0.06853316724300385,
"signal/accuracy_reward/centered_abs_mean": 0.1386474609375,
"signal/accuracy_reward/group_bin_occupancy": 0.188671875,
"signal/accuracy_reward/group_std_mean": 0.1808041363954544,
"signal/accuracy_reward/group_zero_std_frac": 0.490625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06932373046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06932373046875,
"signal/advantage_abs_mean": 0.08480708599090576,
"signal/advantage_pre_scale_abs_mean": 0.08480708599090576,
"signal/advantage_pre_scale_std": 0.1272047370672226,
"signal/advantage_std": 0.1272047370672226,
"signal/brier_reward/centered_abs_mean": 0.18749885261058807,
"signal/brier_reward/group_bin_occupancy": 0.840234375,
"signal/brier_reward/group_std_mean": 0.23523322641849517,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01351576652377844,
"signal/brier_reward/weight": 0.07209239751100541,
"signal/brier_reward/weighted_centered_abs_mean": 0.01351576652377844,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_bin_occupancy": 0.127734375,
"signal/format_reward/group_std_mean": 0.003866990143433213,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002764544356614351,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72265625,
"signal/frontier_aurc_reward/group_std_mean": 0.004374626139178872,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.676385360653512e-05,
"signal/frontier_aurc_reward/weight": 0.016913557425141335,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.676385360653512e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2529231429100037,
"signal/frontier_coverage_1/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_1/group_std_mean": 0.3189453959465027,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003983773337677121,
"signal/frontier_coverage_1/weight": 0.01575196459889412,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003983773337677121,
"signal/frontier_coverage_10/centered_abs_mean": 0.2529231429100037,
"signal/frontier_coverage_10/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_10/group_std_mean": 0.3189453959465027,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003983773337677121,
"signal/frontier_coverage_10/weight": 0.01575196459889412,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003983773337677121,
"signal/frontier_coverage_15/centered_abs_mean": 0.2529231429100037,
"signal/frontier_coverage_15/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_15/group_std_mean": 0.3189453959465027,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003983773337677121,
"signal/frontier_coverage_15/weight": 0.01575196459889412,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003983773337677121,
"signal/frontier_coverage_20/centered_abs_mean": 0.2529231429100037,
"signal/frontier_coverage_20/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_20/group_std_mean": 0.3189453959465027,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003983773337677121,
"signal/frontier_coverage_20/weight": 0.01575196459889412,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003983773337677121,
"signal/frontier_coverage_25/centered_abs_mean": 0.2529231429100037,
"signal/frontier_coverage_25/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_25/group_std_mean": 0.3189453959465027,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003983773337677121,
"signal/frontier_coverage_25/weight": 0.01575196459889412,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003983773337677121,
"signal/frontier_coverage_5/centered_abs_mean": 0.2529231429100037,
"signal/frontier_coverage_5/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_5/group_std_mean": 0.3189453959465027,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003983773337677121,
"signal/frontier_coverage_5/weight": 0.01575196459889412,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003983773337677121,
"signal/frontier_ece_reward/centered_abs_mean": 0.04883822426199913,
"signal/frontier_ece_reward/group_bin_occupancy": 0.71328125,
"signal/frontier_ece_reward/group_std_mean": 0.0641761988401413,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005746498424559832,
"signal/frontier_ece_reward/weight": 0.1176643967628479,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005746498424559832,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.11229871660470962,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.58046875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.14136107563972472,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022366367653012274,
"signal/frontier_entropy_batch_reward/weight": 0.19911785721778869,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022366367653012274,
"step": 60
},
{
"adaptive_ema/accuracy_reward": 0.3038428838680613,
"adaptive_ema/brier_reward": 0.5228995794258062,
"adaptive_ema/format_reward": 0.8034687394880666,
"adaptive_ema/frontier_aurc_reward": 0.18958452402128695,
"adaptive_ema/frontier_coverage_1": 0.24643764275075056,
"adaptive_ema/frontier_coverage_10": 0.24643764275075056,
"adaptive_ema/frontier_coverage_15": 0.24643764275075056,
"adaptive_ema/frontier_coverage_20": 0.24643764275075056,
"adaptive_ema/frontier_coverage_25": 0.24643764275075056,
"adaptive_ema/frontier_coverage_5": 0.24643764275075056,
"adaptive_ema/frontier_ece_reward": 0.1935301358220703,
"adaptive_ema/frontier_entropy_batch_reward": -0.334805943644907,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.07057955414056778,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.01716814637184143,
"adaptive_weight/frontier_coverage_1": 0.015963743999600412,
"adaptive_weight/frontier_coverage_10": 0.015963743999600412,
"adaptive_weight/frontier_coverage_15": 0.015963743999600412,
"adaptive_weight/frontier_coverage_20": 0.015963743999600412,
"adaptive_weight/frontier_coverage_25": 0.015963743999600412,
"adaptive_weight/frontier_coverage_5": 0.015963743999600412,
"adaptive_weight/frontier_ece_reward": 0.1193055659532547,
"adaptive_weight/frontier_entropy_batch_reward": 0.19746426343917847,
"calibration/aurc": 0.3230782286663958,
"calibration/batch_distribution_entropy": 0.9548661124099714,
"calibration/batch_entropy_100bins": 0.6952415427511257,
"calibration/batch_entropy_10bins": 0.9548661124099714,
"calibration/batch_entropy_50bins": 0.7961299048491316,
"calibration/batch_uniqueness": 0.895257568359375,
"calibration/buffer_distribution_entropy": 0.9375800889991058,
"calibration/buffer_entropy_100bins": 0.6608783801545776,
"calibration/buffer_entropy_10bins": 0.9375800889991058,
"calibration/buffer_entropy_50bins": 0.761598284883173,
"calibration/confidence_entropy": 0.47213845584893355,
"calibration/coverage@0%": 0.008203125,
"calibration/coverage@1%": 0.008203125,
"calibration/coverage@10%": 0.057421875,
"calibration/coverage@15%": 0.143359375,
"calibration/coverage@20%": 0.21484375,
"calibration/coverage@25%": 0.363671875,
"calibration/coverage@30%": 0.51640625,
"calibration/coverage@5%": 0.022265625,
"calibration/ece": 0.17461655873487095,
"calibration/mean_confidence": 0.4572429964962189,
"calibration/prompt_uniqueness": 0.79521484375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 939.4,
"completions/max_terminated_length": 589.4,
"completions/mean_length": 185.18642578125,
"completions/mean_terminated_length": 184.7908447265625,
"completions/min_length": 69.4,
"completions/min_terminated_length": 69.4,
"epoch": 0.208,
"grad_norm": 0.001206784276291728,
"learning_rate": 1e-06,
"loss": 0.0014,
"num_tokens": 218438210.0,
"reward": 0.8118098497390747,
"reward_std": 0.1081416666507721,
"rewards/accuracy_reward": 0.51357421875,
"rewards/brier_reward": 0.7503050684928894,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.0029526965226978064,
"rewards/frontier_coverage_1": 0.080244729667902,
"rewards/frontier_coverage_10": 0.080244729667902,
"rewards/frontier_coverage_15": 0.080244729667902,
"rewards/frontier_coverage_20": 0.080244729667902,
"rewards/frontier_coverage_25": 0.080244729667902,
"rewards/frontier_coverage_5": 0.080244729667902,
"rewards/frontier_ece_reward": 0.02022084631025791,
"rewards/frontier_entropy_batch_reward": -0.039874791353940967,
"signal/accuracy_reward/centered_abs_mean": 0.141192626953125,
"signal/accuracy_reward/group_bin_occupancy": 0.194140625,
"signal/accuracy_reward/group_std_mean": 0.18957480490207673,
"signal/accuracy_reward/group_zero_std_frac": 0.446875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0705963134765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0705963134765625,
"signal/advantage_abs_mean": 0.08337005525827408,
"signal/advantage_pre_scale_abs_mean": 0.08337005525827408,
"signal/advantage_pre_scale_std": 0.12559250891208648,
"signal/advantage_std": 0.12559250891208648,
"signal/brier_reward/centered_abs_mean": 0.18458410501480102,
"signal/brier_reward/group_bin_occupancy": 0.85859375,
"signal/brier_reward/group_std_mean": 0.23162301778793334,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01303065400570631,
"signal/brier_reward/weight": 0.07057955414056778,
"signal/brier_reward/weighted_centered_abs_mean": 0.01303065400570631,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_bin_occupancy": 0.12734375,
"signal/format_reward/group_std_mean": 0.0033145629335194827,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002375226141884923,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7328125,
"signal/frontier_aurc_reward/group_std_mean": 0.0037417122628539802,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.078082347405143e-05,
"signal/frontier_aurc_reward/weight": 0.01716814637184143,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.078082347405143e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.25239089131355286,
"signal/frontier_coverage_1/group_bin_occupancy": 0.893359375,
"signal/frontier_coverage_1/group_std_mean": 0.32096874713897705,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004028905322775245,
"signal/frontier_coverage_1/weight": 0.015963743999600412,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004028905322775245,
"signal/frontier_coverage_10/centered_abs_mean": 0.25239089131355286,
"signal/frontier_coverage_10/group_bin_occupancy": 0.893359375,
"signal/frontier_coverage_10/group_std_mean": 0.32096874713897705,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004028905322775245,
"signal/frontier_coverage_10/weight": 0.015963743999600412,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004028905322775245,
"signal/frontier_coverage_15/centered_abs_mean": 0.25239089131355286,
"signal/frontier_coverage_15/group_bin_occupancy": 0.893359375,
"signal/frontier_coverage_15/group_std_mean": 0.32096874713897705,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004028905322775245,
"signal/frontier_coverage_15/weight": 0.015963743999600412,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004028905322775245,
"signal/frontier_coverage_20/centered_abs_mean": 0.25239089131355286,
"signal/frontier_coverage_20/group_bin_occupancy": 0.893359375,
"signal/frontier_coverage_20/group_std_mean": 0.32096874713897705,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004028905322775245,
"signal/frontier_coverage_20/weight": 0.015963743999600412,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004028905322775245,
"signal/frontier_coverage_25/centered_abs_mean": 0.25239089131355286,
"signal/frontier_coverage_25/group_bin_occupancy": 0.893359375,
"signal/frontier_coverage_25/group_std_mean": 0.32096874713897705,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004028905322775245,
"signal/frontier_coverage_25/weight": 0.015963743999600412,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004028905322775245,
"signal/frontier_coverage_5/centered_abs_mean": 0.25239089131355286,
"signal/frontier_coverage_5/group_bin_occupancy": 0.893359375,
"signal/frontier_coverage_5/group_std_mean": 0.32096874713897705,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004028905322775245,
"signal/frontier_coverage_5/weight": 0.015963743999600412,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004028905322775245,
"signal/frontier_ece_reward/centered_abs_mean": 0.04425336569547653,
"signal/frontier_ece_reward/group_bin_occupancy": 0.716796875,
"signal/frontier_ece_reward/group_std_mean": 0.05906342342495918,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005278925970196724,
"signal/frontier_ece_reward/weight": 0.1193055659532547,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005278925970196724,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07931657396256923,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.540625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.10367400497198105,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01566738625988364,
"signal/frontier_entropy_batch_reward/weight": 0.19746426343917847,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01566738625988364,
"step": 65
},
{
"adaptive_ema/accuracy_reward": 0.31235736482134985,
"adaptive_ema/brier_reward": 0.5346501426524177,
"adaptive_ema/format_reward": 0.8130536589228408,
"adaptive_ema/frontier_aurc_reward": 0.18014086612295538,
"adaptive_ema/frontier_coverage_1": 0.2403427264163771,
"adaptive_ema/frontier_coverage_10": 0.2403427264163771,
"adaptive_ema/frontier_coverage_15": 0.2403427264163771,
"adaptive_ema/frontier_coverage_20": 0.2403427264163771,
"adaptive_ema/frontier_coverage_25": 0.2403427264163771,
"adaptive_ema/frontier_coverage_5": 0.2403427264163771,
"adaptive_ema/frontier_ece_reward": 0.18498148000477496,
"adaptive_ema/frontier_entropy_batch_reward": -0.32019707006610054,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.06907036155462265,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.017426037043333054,
"adaptive_weight/frontier_coverage_1": 0.016146432608366013,
"adaptive_weight/frontier_coverage_10": 0.016146432608366013,
"adaptive_weight/frontier_coverage_15": 0.016146432608366013,
"adaptive_weight/frontier_coverage_20": 0.016146432608366013,
"adaptive_weight/frontier_coverage_25": 0.016146432608366013,
"adaptive_weight/frontier_coverage_5": 0.016146432608366013,
"adaptive_weight/frontier_ece_reward": 0.12097169011831284,
"adaptive_weight/frontier_entropy_batch_reward": 0.19595331251621245,
"calibration/aurc": 0.3370663750465913,
"calibration/batch_distribution_entropy": 0.9550954799649769,
"calibration/batch_entropy_100bins": 0.7028098750078203,
"calibration/batch_entropy_10bins": 0.9550954799649769,
"calibration/batch_entropy_50bins": 0.8028701619900716,
"calibration/batch_uniqueness": 0.8958972456600977,
"calibration/buffer_distribution_entropy": 0.9420689064142062,
"calibration/buffer_entropy_100bins": 0.6680678396022661,
"calibration/buffer_entropy_10bins": 0.9420689064142062,
"calibration/buffer_entropy_50bins": 0.7688077976399332,
"calibration/confidence_entropy": 0.46388023878603474,
"calibration/coverage@0%": 0.003125,
"calibration/coverage@1%": 0.003125,
"calibration/coverage@10%": 0.03712469362745098,
"calibration/coverage@15%": 0.12622395833333333,
"calibration/coverage@20%": 0.2442907475490196,
"calibration/coverage@25%": 0.32597273284313727,
"calibration/coverage@30%": 0.4839981617647059,
"calibration/coverage@5%": 0.01015625,
"calibration/ece": 0.19436231464460785,
"calibration/mean_confidence": 0.4149125628063725,
"calibration/prompt_uniqueness": 0.785660688735692,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0009765625,
"completions/max_length": 1340.0,
"completions/max_terminated_length": 575.0,
"completions/mean_length": 189.396484375,
"completions/mean_terminated_length": 188.08037414550782,
"completions/min_length": 64.2,
"completions/min_terminated_length": 64.2,
"epoch": 0.224,
"grad_norm": 0.001395778963342309,
"learning_rate": 1e-06,
"loss": 0.0025,
"num_tokens": 235530814.0,
"reward": 0.7931761741638184,
"reward_std": 0.10108533054590225,
"rewards/accuracy_reward": 0.46767578125,
"rewards/brier_reward": 0.7646504402160644,
"rewards/format_reward": 0.9986328125,
"rewards/frontier_aurc_reward": -0.003211074694991112,
"rewards/frontier_coverage_1": 0.1359811007976532,
"rewards/frontier_coverage_10": 0.1359811007976532,
"rewards/frontier_coverage_15": 0.1359811007976532,
"rewards/frontier_coverage_20": 0.1359811007976532,
"rewards/frontier_coverage_25": 0.1359811007976532,
"rewards/frontier_coverage_5": 0.1359811007976532,
"rewards/frontier_ece_reward": 0.017966778576374055,
"rewards/frontier_entropy_batch_reward": -0.04219883792102337,
"signal/accuracy_reward/centered_abs_mean": 0.128875732421875,
"signal/accuracy_reward/group_bin_occupancy": 0.1859375,
"signal/accuracy_reward/group_std_mean": 0.1700698047876358,
"signal/accuracy_reward/group_zero_std_frac": 0.5125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0644378662109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0644378662109375,
"signal/advantage_abs_mean": 0.07703877985477448,
"signal/advantage_pre_scale_abs_mean": 0.07703877985477448,
"signal/advantage_pre_scale_std": 0.12077159732580185,
"signal/advantage_std": 0.12077159732580185,
"signal/brier_reward/centered_abs_mean": 0.1796649605035782,
"signal/brier_reward/group_bin_occupancy": 0.8453125,
"signal/brier_reward/group_std_mean": 0.22606565058231354,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012408938072621823,
"signal/brier_reward/weight": 0.06907036155462265,
"signal/brier_reward/weighted_centered_abs_mean": 0.012408938072621823,
"signal/format_reward/centered_abs_mean": 0.00264892578125,
"signal/format_reward/group_bin_occupancy": 0.13046875,
"signal/format_reward/group_std_mean": 0.0077339803334325555,
"signal/format_reward/group_zero_std_frac": 0.95625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001324462890625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001324462890625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024040113668888806,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72734375,
"signal/frontier_aurc_reward/group_std_mean": 0.003842458548024297,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.189369719824754e-05,
"signal/frontier_aurc_reward/weight": 0.017426037043333054,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.189369719824754e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.24840882122516633,
"signal/frontier_coverage_1/group_bin_occupancy": 0.893359375,
"signal/frontier_coverage_1/group_std_mean": 0.3130028069019318,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004010894149541855,
"signal/frontier_coverage_1/weight": 0.016146432608366013,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004010894149541855,
"signal/frontier_coverage_10/centered_abs_mean": 0.24840882122516633,
"signal/frontier_coverage_10/group_bin_occupancy": 0.893359375,
"signal/frontier_coverage_10/group_std_mean": 0.3130028069019318,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004010894149541855,
"signal/frontier_coverage_10/weight": 0.016146432608366013,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004010894149541855,
"signal/frontier_coverage_15/centered_abs_mean": 0.24840882122516633,
"signal/frontier_coverage_15/group_bin_occupancy": 0.893359375,
"signal/frontier_coverage_15/group_std_mean": 0.3130028069019318,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004010894149541855,
"signal/frontier_coverage_15/weight": 0.016146432608366013,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004010894149541855,
"signal/frontier_coverage_20/centered_abs_mean": 0.24840882122516633,
"signal/frontier_coverage_20/group_bin_occupancy": 0.893359375,
"signal/frontier_coverage_20/group_std_mean": 0.3130028069019318,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004010894149541855,
"signal/frontier_coverage_20/weight": 0.016146432608366013,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004010894149541855,
"signal/frontier_coverage_25/centered_abs_mean": 0.24840882122516633,
"signal/frontier_coverage_25/group_bin_occupancy": 0.893359375,
"signal/frontier_coverage_25/group_std_mean": 0.3130028069019318,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004010894149541855,
"signal/frontier_coverage_25/weight": 0.016146432608366013,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004010894149541855,
"signal/frontier_coverage_5/centered_abs_mean": 0.24840882122516633,
"signal/frontier_coverage_5/group_bin_occupancy": 0.893359375,
"signal/frontier_coverage_5/group_std_mean": 0.3130028069019318,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004010894149541855,
"signal/frontier_coverage_5/weight": 0.016146432608366013,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004010894149541855,
"signal/frontier_ece_reward/centered_abs_mean": 0.04076602905988693,
"signal/frontier_ece_reward/group_bin_occupancy": 0.70234375,
"signal/frontier_ece_reward/group_std_mean": 0.054039982706308366,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004931167047470808,
"signal/frontier_ece_reward/weight": 0.12097169011831284,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004931167047470808,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07719949334859848,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.580078125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.09887575209140778,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015121790021657944,
"signal/frontier_entropy_batch_reward/weight": 0.19595331251621245,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015121790021657944,
"step": 70
},
{
"adaptive_ema/accuracy_reward": 0.3219364410288985,
"adaptive_ema/brier_reward": 0.5454268242624374,
"adaptive_ema/format_reward": 0.8221584566218649,
"adaptive_ema/frontier_aurc_reward": 0.17115832332112776,
"adaptive_ema/frontier_coverage_1": 0.23319734935747088,
"adaptive_ema/frontier_coverage_10": 0.23319734935747088,
"adaptive_ema/frontier_coverage_15": 0.23319734935747088,
"adaptive_ema/frontier_coverage_20": 0.23319734935747088,
"adaptive_ema/frontier_coverage_25": 0.23319734935747088,
"adaptive_ema/frontier_coverage_5": 0.23319734935747088,
"adaptive_ema/frontier_ece_reward": 0.17682809336381372,
"adaptive_ema/frontier_entropy_batch_reward": -0.30664606653046406,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.06764657944440841,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.017662768438458442,
"adaptive_weight/frontier_coverage_1": 0.0163407064974308,
"adaptive_weight/frontier_coverage_10": 0.0163407064974308,
"adaptive_weight/frontier_coverage_15": 0.0163407064974308,
"adaptive_weight/frontier_coverage_20": 0.0163407064974308,
"adaptive_weight/frontier_coverage_25": 0.0163407064974308,
"adaptive_weight/frontier_coverage_5": 0.0163407064974308,
"adaptive_weight/frontier_ece_reward": 0.1224995955824852,
"adaptive_weight/frontier_entropy_batch_reward": 0.1944468140602112,
"calibration/aurc": 0.3872978113781852,
"calibration/batch_distribution_entropy": 0.9483224085405197,
"calibration/batch_entropy_100bins": 0.6823282298201587,
"calibration/batch_entropy_10bins": 0.9483224085405197,
"calibration/batch_entropy_50bins": 0.7821902097105213,
"calibration/batch_uniqueness": 0.8889759547000595,
"calibration/buffer_distribution_entropy": 0.9458731353518836,
"calibration/buffer_entropy_100bins": 0.6739486859505013,
"calibration/buffer_entropy_10bins": 0.9458731353518836,
"calibration/buffer_entropy_50bins": 0.7745501236562791,
"calibration/confidence_entropy": 0.4855388941567712,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.07734375,
"calibration/coverage@15%": 0.125,
"calibration/coverage@20%": 0.190625,
"calibration/coverage@25%": 0.2046875,
"calibration/coverage@30%": 0.29466911764705883,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.17449257863567286,
"calibration/mean_confidence": 0.4862572071414702,
"calibration/prompt_uniqueness": 0.7823488614561654,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 964.0,
"completions/max_terminated_length": 572.0,
"completions/mean_length": 194.5607421875,
"completions/mean_terminated_length": 194.03685302734374,
"completions/min_length": 80.2,
"completions/min_terminated_length": 80.2,
"epoch": 0.24,
"grad_norm": 0.0011437971843406558,
"learning_rate": 1e-06,
"loss": 0.0011,
"num_tokens": 252774796.0,
"reward": 0.8140671372413635,
"reward_std": 0.110272516310215,
"rewards/accuracy_reward": 0.52626953125,
"rewards/brier_reward": 0.7511320590972901,
"rewards/format_reward": 0.99892578125,
"rewards/frontier_aurc_reward": -0.0030971964821219443,
"rewards/frontier_coverage_1": 0.07542620496824384,
"rewards/frontier_coverage_10": 0.07542620496824384,
"rewards/frontier_coverage_15": 0.07542620496824384,
"rewards/frontier_coverage_20": 0.07542620496824384,
"rewards/frontier_coverage_25": 0.07542620496824384,
"rewards/frontier_coverage_5": 0.07542620496824384,
"rewards/frontier_ece_reward": 0.019243543781340123,
"rewards/frontier_entropy_batch_reward": -0.04741813093423843,
"signal/accuracy_reward/centered_abs_mean": 0.147760009765625,
"signal/accuracy_reward/group_bin_occupancy": 0.194140625,
"signal/accuracy_reward/group_std_mean": 0.19448045492172242,
"signal/accuracy_reward/group_zero_std_frac": 0.446875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0738800048828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0738800048828125,
"signal/advantage_abs_mean": 0.08491412997245788,
"signal/advantage_pre_scale_abs_mean": 0.08491412997245788,
"signal/advantage_pre_scale_std": 0.13074929565191268,
"signal/advantage_std": 0.13074929565191268,
"signal/brier_reward/centered_abs_mean": 0.18371776044368743,
"signal/brier_reward/group_bin_occupancy": 0.861328125,
"signal/brier_reward/group_std_mean": 0.2315950334072113,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0124272545799613,
"signal/brier_reward/weight": 0.06764657944440841,
"signal/brier_reward/weighted_centered_abs_mean": 0.0124272545799613,
"signal/format_reward/centered_abs_mean": 0.002081298828125,
"signal/format_reward/group_bin_occupancy": 0.129296875,
"signal/format_reward/group_std_mean": 0.006076698703691363,
"signal/format_reward/group_zero_std_frac": 0.965625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002888940554112196,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.732421875,
"signal/frontier_aurc_reward/group_std_mean": 0.004471804574131965,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.103433359181509e-05,
"signal/frontier_aurc_reward/weight": 0.017662768438458442,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.103433359181509e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2310771405696869,
"signal/frontier_coverage_1/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_1/group_std_mean": 0.29875036478042605,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037761200219392776,
"signal/frontier_coverage_1/weight": 0.0163407064974308,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037761200219392776,
"signal/frontier_coverage_10/centered_abs_mean": 0.2310771405696869,
"signal/frontier_coverage_10/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_10/group_std_mean": 0.29875036478042605,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037761200219392776,
"signal/frontier_coverage_10/weight": 0.0163407064974308,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037761200219392776,
"signal/frontier_coverage_15/centered_abs_mean": 0.2310771405696869,
"signal/frontier_coverage_15/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_15/group_std_mean": 0.29875036478042605,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037761200219392776,
"signal/frontier_coverage_15/weight": 0.0163407064974308,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037761200219392776,
"signal/frontier_coverage_20/centered_abs_mean": 0.2310771405696869,
"signal/frontier_coverage_20/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_20/group_std_mean": 0.29875036478042605,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037761200219392776,
"signal/frontier_coverage_20/weight": 0.0163407064974308,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037761200219392776,
"signal/frontier_coverage_25/centered_abs_mean": 0.2310771405696869,
"signal/frontier_coverage_25/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_25/group_std_mean": 0.29875036478042605,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037761200219392776,
"signal/frontier_coverage_25/weight": 0.0163407064974308,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037761200219392776,
"signal/frontier_coverage_5/centered_abs_mean": 0.2310771405696869,
"signal/frontier_coverage_5/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_5/group_std_mean": 0.29875036478042605,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037761200219392776,
"signal/frontier_coverage_5/weight": 0.0163407064974308,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037761200219392776,
"signal/frontier_ece_reward/centered_abs_mean": 0.04328928515315056,
"signal/frontier_ece_reward/group_bin_occupancy": 0.728125,
"signal/frontier_ece_reward/group_std_mean": 0.0567799873650074,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005302841123193502,
"signal/frontier_ece_reward/weight": 0.1224995955824852,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005302841123193502,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07928718775510787,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.621875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.10165912210941315,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015409917011857033,
"signal/frontier_entropy_batch_reward/weight": 0.1944468140602112,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015409917011857033,
"step": 75
},
{
"adaptive_ema/accuracy_reward": 0.3313964150995795,
"adaptive_ema/brier_reward": 0.5559265972898461,
"adaptive_ema/format_reward": 0.830832507451503,
"adaptive_ema/frontier_aurc_reward": 0.16262213049660174,
"adaptive_ema/frontier_coverage_1": 0.2263081342605311,
"adaptive_ema/frontier_coverage_10": 0.2263081342605311,
"adaptive_ema/frontier_coverage_15": 0.2263081342605311,
"adaptive_ema/frontier_coverage_20": 0.2263081342605311,
"adaptive_ema/frontier_coverage_25": 0.2263081342605311,
"adaptive_ema/frontier_coverage_5": 0.2263081342605311,
"adaptive_ema/frontier_ece_reward": 0.16915048062563032,
"adaptive_ema/frontier_entropy_batch_reward": -0.2940994524556769,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.06624611765146256,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.01788846291601658,
"adaptive_weight/frontier_coverage_1": 0.016527966037392615,
"adaptive_weight/frontier_coverage_10": 0.016527966037392615,
"adaptive_weight/frontier_coverage_15": 0.016527966037392615,
"adaptive_weight/frontier_coverage_20": 0.016527966037392615,
"adaptive_weight/frontier_coverage_25": 0.016527966037392615,
"adaptive_weight/frontier_coverage_5": 0.016527966037392615,
"adaptive_weight/frontier_ece_reward": 0.12394552230834961,
"adaptive_weight/frontier_entropy_batch_reward": 0.19305209517478944,
"calibration/aurc": 0.31895930979708764,
"calibration/batch_distribution_entropy": 0.9628895716104496,
"calibration/batch_entropy_100bins": 0.6962293359508783,
"calibration/batch_entropy_10bins": 0.9628895716104496,
"calibration/batch_entropy_50bins": 0.7978527193826969,
"calibration/batch_uniqueness": 0.8968208650016194,
"calibration/buffer_distribution_entropy": 0.948261085906984,
"calibration/buffer_entropy_100bins": 0.6780566979310011,
"calibration/buffer_entropy_10bins": 0.948261085906984,
"calibration/buffer_entropy_50bins": 0.778304443963483,
"calibration/confidence_entropy": 0.4647877319490812,
"calibration/coverage@0%": 0.005078125,
"calibration/coverage@1%": 0.005078125,
"calibration/coverage@10%": 0.08711778375733856,
"calibration/coverage@15%": 0.19024660591976517,
"calibration/coverage@20%": 0.28438723091976514,
"calibration/coverage@25%": 0.39454348091976515,
"calibration/coverage@30%": 0.4867539138943249,
"calibration/coverage@5%": 0.0140625,
"calibration/ece": 0.13742734787793545,
"calibration/mean_confidence": 0.4927926374449608,
"calibration/prompt_uniqueness": 0.7803107318223205,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1155.6,
"completions/max_terminated_length": 555.8,
"completions/mean_length": 191.6619140625,
"completions/mean_terminated_length": 191.13605041503905,
"completions/min_length": 76.8,
"completions/min_terminated_length": 76.8,
"epoch": 0.256,
"grad_norm": 0.001474651973694563,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 269792230.0,
"reward": 0.8049997210502624,
"reward_std": 0.10463125705718994,
"rewards/accuracy_reward": 0.5009765625,
"rewards/brier_reward": 0.7641858220100403,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0030391468200832604,
"rewards/frontier_coverage_1": 0.10821435190737247,
"rewards/frontier_coverage_10": 0.10821435190737247,
"rewards/frontier_coverage_15": 0.10821435190737247,
"rewards/frontier_coverage_20": 0.10821435190737247,
"rewards/frontier_coverage_25": 0.10821435190737247,
"rewards/frontier_coverage_5": 0.10821435190737247,
"rewards/frontier_ece_reward": 0.020147581398487092,
"rewards/frontier_entropy_batch_reward": -0.0477634958922863,
"signal/accuracy_reward/centered_abs_mean": 0.14019775390625,
"signal/accuracy_reward/group_bin_occupancy": 0.18671875,
"signal/accuracy_reward/group_std_mean": 0.17916424572467804,
"signal/accuracy_reward/group_zero_std_frac": 0.50625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.070098876953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.070098876953125,
"signal/advantage_abs_mean": 0.08192529529333115,
"signal/advantage_pre_scale_abs_mean": 0.08192529529333115,
"signal/advantage_pre_scale_std": 0.12622717320919036,
"signal/advantage_std": 0.12622717320919036,
"signal/brier_reward/centered_abs_mean": 0.17578245997428893,
"signal/brier_reward/group_bin_occupancy": 0.8453125,
"signal/brier_reward/group_std_mean": 0.22181777954101561,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01164692472666502,
"signal/brier_reward/weight": 0.06624611765146256,
"signal/brier_reward/weighted_centered_abs_mean": 0.01164692472666502,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027812632266432046,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.729296875,
"signal/frontier_aurc_reward/group_std_mean": 0.004300047783181072,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.974574912921526e-05,
"signal/frontier_aurc_reward/weight": 0.01788846291601658,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.974574912921526e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2263425648212433,
"signal/frontier_coverage_1/group_bin_occupancy": 0.875,
"signal/frontier_coverage_1/group_std_mean": 0.28937026858329773,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037405529990792276,
"signal/frontier_coverage_1/weight": 0.016527966037392615,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037405529990792276,
"signal/frontier_coverage_10/centered_abs_mean": 0.2263425648212433,
"signal/frontier_coverage_10/group_bin_occupancy": 0.875,
"signal/frontier_coverage_10/group_std_mean": 0.28937026858329773,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037405529990792276,
"signal/frontier_coverage_10/weight": 0.016527966037392615,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037405529990792276,
"signal/frontier_coverage_15/centered_abs_mean": 0.2263425648212433,
"signal/frontier_coverage_15/group_bin_occupancy": 0.875,
"signal/frontier_coverage_15/group_std_mean": 0.28937026858329773,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037405529990792276,
"signal/frontier_coverage_15/weight": 0.016527966037392615,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037405529990792276,
"signal/frontier_coverage_20/centered_abs_mean": 0.2263425648212433,
"signal/frontier_coverage_20/group_bin_occupancy": 0.875,
"signal/frontier_coverage_20/group_std_mean": 0.28937026858329773,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037405529990792276,
"signal/frontier_coverage_20/weight": 0.016527966037392615,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037405529990792276,
"signal/frontier_coverage_25/centered_abs_mean": 0.2263425648212433,
"signal/frontier_coverage_25/group_bin_occupancy": 0.875,
"signal/frontier_coverage_25/group_std_mean": 0.28937026858329773,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037405529990792276,
"signal/frontier_coverage_25/weight": 0.016527966037392615,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037405529990792276,
"signal/frontier_coverage_5/centered_abs_mean": 0.2263425648212433,
"signal/frontier_coverage_5/group_bin_occupancy": 0.875,
"signal/frontier_coverage_5/group_std_mean": 0.28937026858329773,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037405529990792276,
"signal/frontier_coverage_5/weight": 0.016527966037392615,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037405529990792276,
"signal/frontier_ece_reward/centered_abs_mean": 0.04063420295715332,
"signal/frontier_ece_reward/group_bin_occupancy": 0.70859375,
"signal/frontier_ece_reward/group_std_mean": 0.05336618795990944,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005035725235939026,
"signal/frontier_ece_reward/weight": 0.12394552230834961,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005035725235939026,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08213324025273323,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.59765625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.10542523190379142,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015855711698532105,
"signal/frontier_entropy_batch_reward/weight": 0.19305209517478944,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015855711698532105,
"step": 80
},
{
"adaptive_ema/accuracy_reward": 0.3397445289155506,
"adaptive_ema/brier_reward": 0.5661552175485272,
"adaptive_ema/format_reward": 0.8390983751782162,
"adaptive_ema/frontier_aurc_reward": 0.1545018851710344,
"adaptive_ema/frontier_coverage_1": 0.22056885627218503,
"adaptive_ema/frontier_coverage_10": 0.22056885627218503,
"adaptive_ema/frontier_coverage_15": 0.22056885627218503,
"adaptive_ema/frontier_coverage_20": 0.22056885627218503,
"adaptive_ema/frontier_coverage_25": 0.22056885627218503,
"adaptive_ema/frontier_coverage_5": 0.22056885627218503,
"adaptive_ema/frontier_ece_reward": 0.16177079064652503,
"adaptive_ema/frontier_entropy_batch_reward": -0.28177420146855825,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.06489593386650086,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.01811096929013729,
"adaptive_weight/frontier_coverage_1": 0.01669577695429325,
"adaptive_weight/frontier_coverage_10": 0.01669577695429325,
"adaptive_weight/frontier_coverage_15": 0.01669577695429325,
"adaptive_weight/frontier_coverage_20": 0.01669577695429325,
"adaptive_weight/frontier_coverage_25": 0.01669577695429325,
"adaptive_weight/frontier_coverage_5": 0.01669577695429325,
"adaptive_weight/frontier_ece_reward": 0.1253859281539917,
"adaptive_weight/frontier_entropy_batch_reward": 0.1917325049638748,
"calibration/aurc": 0.3813991127365959,
"calibration/batch_distribution_entropy": 0.9642356723108738,
"calibration/batch_entropy_100bins": 0.6876133130751552,
"calibration/batch_entropy_10bins": 0.9642356723108738,
"calibration/batch_entropy_50bins": 0.7879933270345396,
"calibration/batch_uniqueness": 0.8944073325171974,
"calibration/buffer_distribution_entropy": 0.9506694534538565,
"calibration/buffer_entropy_100bins": 0.6816668870467762,
"calibration/buffer_entropy_10bins": 0.9506694534538565,
"calibration/buffer_entropy_50bins": 0.7816761689320492,
"calibration/confidence_entropy": 0.485735322011209,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.055078125,
"calibration/coverage@15%": 0.1421875,
"calibration/coverage@20%": 0.2015625,
"calibration/coverage@25%": 0.2703125,
"calibration/coverage@30%": 0.3711755442759296,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.1416843897730325,
"calibration/mean_confidence": 0.48195591092772727,
"calibration/prompt_uniqueness": 0.7607490976196669,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1346.6,
"completions/max_terminated_length": 528.0,
"completions/mean_length": 199.40859375,
"completions/mean_terminated_length": 198.88611450195313,
"completions/min_length": 78.8,
"completions/min_terminated_length": 78.8,
"epoch": 0.272,
"grad_norm": 0.001055843778885901,
"learning_rate": 1e-06,
"loss": 0.0014,
"num_tokens": 286799870.0,
"reward": 0.8040920615196228,
"reward_std": 0.0994871512055397,
"rewards/accuracy_reward": 0.494921875,
"rewards/brier_reward": 0.7610333204269409,
"rewards/format_reward": 0.99921875,
"rewards/frontier_aurc_reward": -0.0030955026391893624,
"rewards/frontier_coverage_1": 0.10903444737195969,
"rewards/frontier_coverage_10": 0.10903444737195969,
"rewards/frontier_coverage_15": 0.10903444737195969,
"rewards/frontier_coverage_20": 0.10903444737195969,
"rewards/frontier_coverage_25": 0.10903444737195969,
"rewards/frontier_coverage_5": 0.10903444737195969,
"rewards/frontier_ece_reward": 0.016309389285743235,
"rewards/frontier_entropy_batch_reward": -0.028366550896316768,
"signal/accuracy_reward/centered_abs_mean": 0.13192138671875,
"signal/accuracy_reward/group_bin_occupancy": 0.183984375,
"signal/accuracy_reward/group_std_mean": 0.16993003189563752,
"signal/accuracy_reward/group_zero_std_frac": 0.528125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.065960693359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.065960693359375,
"signal/advantage_abs_mean": 0.0777826264500618,
"signal/advantage_pre_scale_abs_mean": 0.0777826264500618,
"signal/advantage_pre_scale_std": 0.1212904393672943,
"signal/advantage_std": 0.1212904393672943,
"signal/brier_reward/centered_abs_mean": 0.1730465292930603,
"signal/brier_reward/group_bin_occupancy": 0.858984375,
"signal/brier_reward/group_std_mean": 0.21734442710876464,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011229727230966092,
"signal/brier_reward/weight": 0.06489593386650086,
"signal/brier_reward/weighted_centered_abs_mean": 0.011229727230966092,
"signal/format_reward/centered_abs_mean": 0.001513671875,
"signal/format_reward/group_bin_occupancy": 0.128125,
"signal/format_reward/group_std_mean": 0.004419417306780815,
"signal/format_reward/group_zero_std_frac": 0.975,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026437715161591767,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.744140625,
"signal/frontier_aurc_reward/group_std_mean": 0.0040718474425375465,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.787545258295722e-05,
"signal/frontier_aurc_reward/weight": 0.01811096929013729,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.787545258295722e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.23006677329540254,
"signal/frontier_coverage_1/group_bin_occupancy": 0.890625,
"signal/frontier_coverage_1/group_std_mean": 0.2924661636352539,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038412095978856086,
"signal/frontier_coverage_1/weight": 0.01669577695429325,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038412095978856086,
"signal/frontier_coverage_10/centered_abs_mean": 0.23006677329540254,
"signal/frontier_coverage_10/group_bin_occupancy": 0.890625,
"signal/frontier_coverage_10/group_std_mean": 0.2924661636352539,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038412095978856086,
"signal/frontier_coverage_10/weight": 0.01669577695429325,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038412095978856086,
"signal/frontier_coverage_15/centered_abs_mean": 0.23006677329540254,
"signal/frontier_coverage_15/group_bin_occupancy": 0.890625,
"signal/frontier_coverage_15/group_std_mean": 0.2924661636352539,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038412095978856086,
"signal/frontier_coverage_15/weight": 0.01669577695429325,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038412095978856086,
"signal/frontier_coverage_20/centered_abs_mean": 0.23006677329540254,
"signal/frontier_coverage_20/group_bin_occupancy": 0.890625,
"signal/frontier_coverage_20/group_std_mean": 0.2924661636352539,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038412095978856086,
"signal/frontier_coverage_20/weight": 0.01669577695429325,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038412095978856086,
"signal/frontier_coverage_25/centered_abs_mean": 0.23006677329540254,
"signal/frontier_coverage_25/group_bin_occupancy": 0.890625,
"signal/frontier_coverage_25/group_std_mean": 0.2924661636352539,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038412095978856086,
"signal/frontier_coverage_25/weight": 0.01669577695429325,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038412095978856086,
"signal/frontier_coverage_5/centered_abs_mean": 0.23006677329540254,
"signal/frontier_coverage_5/group_bin_occupancy": 0.890625,
"signal/frontier_coverage_5/group_std_mean": 0.2924661636352539,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038412095978856086,
"signal/frontier_coverage_5/weight": 0.01669577695429325,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038412095978856086,
"signal/frontier_ece_reward/centered_abs_mean": 0.03505429700016975,
"signal/frontier_ece_reward/group_bin_occupancy": 0.70234375,
"signal/frontier_ece_reward/group_std_mean": 0.04616514593362808,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004394615720957518,
"signal/frontier_ece_reward/weight": 0.1253859281539917,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004394615720957518,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.05788676589727402,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.588671875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.07604653090238571,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011109101679176092,
"signal/frontier_entropy_batch_reward/weight": 0.1917325049638748,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011109101679176092,
"step": 85
},
{
"adaptive_ema/accuracy_reward": 0.34651812406479604,
"adaptive_ema/brier_reward": 0.5753681688750691,
"adaptive_ema/format_reward": 0.84693925290468,
"adaptive_ema/frontier_aurc_reward": 0.1467695300595397,
"adaptive_ema/frontier_coverage_1": 0.21532816272022975,
"adaptive_ema/frontier_coverage_10": 0.21532816272022975,
"adaptive_ema/frontier_coverage_15": 0.21532816272022975,
"adaptive_ema/frontier_coverage_20": 0.21532816272022975,
"adaptive_ema/frontier_coverage_25": 0.21532816272022975,
"adaptive_ema/frontier_coverage_5": 0.21532816272022975,
"adaptive_ema/frontier_ece_reward": 0.15449289534769098,
"adaptive_ema/frontier_entropy_batch_reward": -0.26877567477837994,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.06369541734457015,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.018327684327960014,
"adaptive_weight/frontier_coverage_1": 0.016855016723275183,
"adaptive_weight/frontier_coverage_10": 0.016855016723275183,
"adaptive_weight/frontier_coverage_15": 0.016855016723275183,
"adaptive_weight/frontier_coverage_20": 0.016855016723275183,
"adaptive_weight/frontier_coverage_25": 0.016855016723275183,
"adaptive_weight/frontier_coverage_5": 0.016855016723275183,
"adaptive_weight/frontier_ece_reward": 0.1268280863761902,
"adaptive_weight/frontier_entropy_batch_reward": 0.19031870663166045,
"calibration/aurc": 0.37261471884124653,
"calibration/batch_distribution_entropy": 0.9750497694198141,
"calibration/batch_entropy_100bins": 0.6965538382719203,
"calibration/batch_entropy_10bins": 0.9750497694198141,
"calibration/batch_entropy_50bins": 0.8007210950717083,
"calibration/batch_uniqueness": 0.9004279470884367,
"calibration/buffer_distribution_entropy": 0.9530347440047425,
"calibration/buffer_entropy_100bins": 0.6844022960456384,
"calibration/buffer_entropy_10bins": 0.9530347440047425,
"calibration/buffer_entropy_50bins": 0.7844857201320607,
"calibration/confidence_entropy": 0.4903165946191407,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.015625,
"calibration/coverage@15%": 0.050390625,
"calibration/coverage@20%": 0.093359375,
"calibration/coverage@25%": 0.149609375,
"calibration/coverage@30%": 0.28918480919765166,
"calibration/coverage@5%": 0.015625,
"calibration/ece": 0.11769263418313192,
"calibration/mean_confidence": 0.4960635546617686,
"calibration/prompt_uniqueness": 0.7969746886381374,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1159.0,
"completions/max_terminated_length": 560.0,
"completions/mean_length": 193.3376953125,
"completions/mean_terminated_length": 192.68275756835936,
"completions/min_length": 83.4,
"completions/min_terminated_length": 83.4,
"epoch": 0.288,
"grad_norm": 0.0019680638797581196,
"learning_rate": 1e-06,
"loss": 0.0016,
"num_tokens": 303737824.0,
"reward": 0.8043852686882019,
"reward_std": 0.10160344392061234,
"rewards/accuracy_reward": 0.49619140625,
"rewards/brier_reward": 0.7544908285140991,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0031197931617498397,
"rewards/frontier_coverage_1": 0.10280315726995468,
"rewards/frontier_coverage_10": 0.10280315726995468,
"rewards/frontier_coverage_15": 0.10280315726995468,
"rewards/frontier_coverage_20": 0.10280315726995468,
"rewards/frontier_coverage_25": 0.10280315726995468,
"rewards/frontier_coverage_5": 0.10280315726995468,
"rewards/frontier_ece_reward": 0.014545264653861523,
"rewards/frontier_entropy_batch_reward": -0.019750583730638028,
"signal/accuracy_reward/centered_abs_mean": 0.139532470703125,
"signal/accuracy_reward/group_bin_occupancy": 0.1890625,
"signal/accuracy_reward/group_std_mean": 0.18288062512874603,
"signal/accuracy_reward/group_zero_std_frac": 0.4875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0697662353515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0697662353515625,
"signal/advantage_abs_mean": 0.07806335389614105,
"signal/advantage_pre_scale_abs_mean": 0.07806335389614105,
"signal/advantage_pre_scale_std": 0.12149789035320283,
"signal/advantage_std": 0.12149789035320283,
"signal/brier_reward/centered_abs_mean": 0.17784371674060823,
"signal/brier_reward/group_bin_occupancy": 0.846484375,
"signal/brier_reward/group_std_mean": 0.22363564372062683,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011328470706939698,
"signal/brier_reward/weight": 0.06369541734457015,
"signal/brier_reward/weighted_centered_abs_mean": 0.011328470706939698,
"signal/format_reward/centered_abs_mean": 0.001312255859375,
"signal/format_reward/group_bin_occupancy": 0.12734375,
"signal/format_reward/group_std_mean": 0.0035306816454976795,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024917138274759055,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.736328125,
"signal/frontier_aurc_reward/group_std_mean": 0.0037776767276227474,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.565996496239677e-05,
"signal/frontier_aurc_reward/weight": 0.018327684327960014,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.565996496239677e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.23597144186496735,
"signal/frontier_coverage_1/group_bin_occupancy": 0.88515625,
"signal/frontier_coverage_1/group_std_mean": 0.30281777381896974,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0039774681441485885,
"signal/frontier_coverage_1/weight": 0.016855016723275183,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0039774681441485885,
"signal/frontier_coverage_10/centered_abs_mean": 0.23597144186496735,
"signal/frontier_coverage_10/group_bin_occupancy": 0.88515625,
"signal/frontier_coverage_10/group_std_mean": 0.30281777381896974,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0039774681441485885,
"signal/frontier_coverage_10/weight": 0.016855016723275183,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0039774681441485885,
"signal/frontier_coverage_15/centered_abs_mean": 0.23597144186496735,
"signal/frontier_coverage_15/group_bin_occupancy": 0.88515625,
"signal/frontier_coverage_15/group_std_mean": 0.30281777381896974,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0039774681441485885,
"signal/frontier_coverage_15/weight": 0.016855016723275183,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0039774681441485885,
"signal/frontier_coverage_20/centered_abs_mean": 0.23597144186496735,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88515625,
"signal/frontier_coverage_20/group_std_mean": 0.30281777381896974,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0039774681441485885,
"signal/frontier_coverage_20/weight": 0.016855016723275183,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0039774681441485885,
"signal/frontier_coverage_25/centered_abs_mean": 0.23597144186496735,
"signal/frontier_coverage_25/group_bin_occupancy": 0.88515625,
"signal/frontier_coverage_25/group_std_mean": 0.30281777381896974,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0039774681441485885,
"signal/frontier_coverage_25/weight": 0.016855016723275183,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0039774681441485885,
"signal/frontier_coverage_5/centered_abs_mean": 0.23597144186496735,
"signal/frontier_coverage_5/group_bin_occupancy": 0.88515625,
"signal/frontier_coverage_5/group_std_mean": 0.30281777381896974,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0039774681441485885,
"signal/frontier_coverage_5/weight": 0.016855016723275183,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0039774681441485885,
"signal/frontier_ece_reward/centered_abs_mean": 0.033656676113605496,
"signal/frontier_ece_reward/group_bin_occupancy": 0.679296875,
"signal/frontier_ece_reward/group_std_mean": 0.04492291808128357,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004268449451774359,
"signal/frontier_ece_reward/weight": 0.1268280863761902,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004268449451774359,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.04934029281139374,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.593359375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.06619658097624778,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.009388817101716995,
"signal/frontier_entropy_batch_reward/weight": 0.19031870663166045,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.009388817101716995,
"step": 90
},
{
"adaptive_ema/accuracy_reward": 0.3547448487288868,
"adaptive_ema/brier_reward": 0.583940797601032,
"adaptive_ema/format_reward": 0.8544245276978717,
"adaptive_ema/frontier_aurc_reward": 0.13943217269215363,
"adaptive_ema/frontier_coverage_1": 0.20918630842104977,
"adaptive_ema/frontier_coverage_10": 0.20918630842104977,
"adaptive_ema/frontier_coverage_15": 0.20918630842104977,
"adaptive_ema/frontier_coverage_20": 0.20918630842104977,
"adaptive_ema/frontier_coverage_25": 0.20918630842104977,
"adaptive_ema/frontier_coverage_5": 0.20918630842104977,
"adaptive_ema/frontier_ece_reward": 0.14765743845246337,
"adaptive_ema/frontier_entropy_batch_reward": -0.25743292047942407,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.06253615096211433,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.018522783368825912,
"adaptive_weight/frontier_coverage_1": 0.01702139526605606,
"adaptive_weight/frontier_coverage_10": 0.01702139526605606,
"adaptive_weight/frontier_coverage_15": 0.01702139526605606,
"adaptive_weight/frontier_coverage_20": 0.01702139526605606,
"adaptive_weight/frontier_coverage_25": 0.01702139526605606,
"adaptive_weight/frontier_coverage_5": 0.01702139526605606,
"adaptive_weight/frontier_ece_reward": 0.1281127244234085,
"adaptive_weight/frontier_entropy_batch_reward": 0.18899996280670167,
"calibration/aurc": 0.31297741273515317,
"calibration/batch_distribution_entropy": 0.9676274846939196,
"calibration/batch_entropy_100bins": 0.7031356835430308,
"calibration/batch_entropy_10bins": 0.9676274846939196,
"calibration/batch_entropy_50bins": 0.8033018250346864,
"calibration/batch_uniqueness": 0.8989820714622715,
"calibration/buffer_distribution_entropy": 0.9552243834756542,
"calibration/buffer_entropy_100bins": 0.687498391590213,
"calibration/buffer_entropy_10bins": 0.9552243834756542,
"calibration/buffer_entropy_50bins": 0.7874907977014229,
"calibration/confidence_entropy": 0.48079966853899203,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0794138331702544,
"calibration/coverage@15%": 0.16190909368884537,
"calibration/coverage@20%": 0.21037869985322896,
"calibration/coverage@25%": 0.38351807118395304,
"calibration/coverage@30%": 0.5058372064579256,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.13226369381293737,
"calibration/mean_confidence": 0.5072106308874051,
"calibration/prompt_uniqueness": 0.7839407802744537,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1068.2,
"completions/max_terminated_length": 860.4,
"completions/mean_length": 194.43525390625,
"completions/mean_terminated_length": 194.04138793945313,
"completions/min_length": 82.6,
"completions/min_terminated_length": 82.6,
"epoch": 0.304,
"grad_norm": 0.0018993834964931011,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 320658793.0,
"reward": 0.8003029823303223,
"reward_std": 0.09959358870983123,
"rewards/accuracy_reward": 0.49970703125,
"rewards/brier_reward": 0.7468193769454956,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.003175217052921653,
"rewards/frontier_coverage_1": 0.09870433807373047,
"rewards/frontier_coverage_10": 0.09870433807373047,
"rewards/frontier_coverage_15": 0.09870433807373047,
"rewards/frontier_coverage_20": 0.09870433807373047,
"rewards/frontier_coverage_25": 0.09870433807373047,
"rewards/frontier_coverage_5": 0.09870433807373047,
"rewards/frontier_ece_reward": 0.012848987244069576,
"rewards/frontier_entropy_batch_reward": -0.041863073408603665,
"signal/accuracy_reward/centered_abs_mean": 0.133673095703125,
"signal/accuracy_reward/group_bin_occupancy": 0.18671875,
"signal/accuracy_reward/group_std_mean": 0.1743601679801941,
"signal/accuracy_reward/group_zero_std_frac": 0.50625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0668365478515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0668365478515625,
"signal/advantage_abs_mean": 0.07773177325725555,
"signal/advantage_pre_scale_abs_mean": 0.07773177325725555,
"signal/advantage_pre_scale_std": 0.11809686571359634,
"signal/advantage_std": 0.11809686571359634,
"signal/brier_reward/centered_abs_mean": 0.1809590458869934,
"signal/brier_reward/group_bin_occupancy": 0.85234375,
"signal/brier_reward/group_std_mean": 0.22669825851917266,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011314875073730945,
"signal/brier_reward/weight": 0.06253615096211433,
"signal/brier_reward/weighted_centered_abs_mean": 0.011314875073730945,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025466226506978273,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.727734375,
"signal/frontier_aurc_reward/group_std_mean": 0.0038951355498284103,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.718490381492302e-05,
"signal/frontier_aurc_reward/weight": 0.018522783368825912,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.718490381492302e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.24377259016036987,
"signal/frontier_coverage_1/group_bin_occupancy": 0.878515625,
"signal/frontier_coverage_1/group_std_mean": 0.31025264263153074,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004149554390460252,
"signal/frontier_coverage_1/weight": 0.01702139526605606,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004149554390460252,
"signal/frontier_coverage_10/centered_abs_mean": 0.24377259016036987,
"signal/frontier_coverage_10/group_bin_occupancy": 0.878515625,
"signal/frontier_coverage_10/group_std_mean": 0.31025264263153074,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004149554390460252,
"signal/frontier_coverage_10/weight": 0.01702139526605606,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004149554390460252,
"signal/frontier_coverage_15/centered_abs_mean": 0.24377259016036987,
"signal/frontier_coverage_15/group_bin_occupancy": 0.878515625,
"signal/frontier_coverage_15/group_std_mean": 0.31025264263153074,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004149554390460252,
"signal/frontier_coverage_15/weight": 0.01702139526605606,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004149554390460252,
"signal/frontier_coverage_20/centered_abs_mean": 0.24377259016036987,
"signal/frontier_coverage_20/group_bin_occupancy": 0.878515625,
"signal/frontier_coverage_20/group_std_mean": 0.31025264263153074,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004149554390460252,
"signal/frontier_coverage_20/weight": 0.01702139526605606,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004149554390460252,
"signal/frontier_coverage_25/centered_abs_mean": 0.24377259016036987,
"signal/frontier_coverage_25/group_bin_occupancy": 0.878515625,
"signal/frontier_coverage_25/group_std_mean": 0.31025264263153074,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004149554390460252,
"signal/frontier_coverage_25/weight": 0.01702139526605606,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004149554390460252,
"signal/frontier_coverage_5/centered_abs_mean": 0.24377259016036987,
"signal/frontier_coverage_5/group_bin_occupancy": 0.878515625,
"signal/frontier_coverage_5/group_std_mean": 0.31025264263153074,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004149554390460252,
"signal/frontier_coverage_5/weight": 0.01702139526605606,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004149554390460252,
"signal/frontier_ece_reward/centered_abs_mean": 0.03277038559317589,
"signal/frontier_ece_reward/group_bin_occupancy": 0.675,
"signal/frontier_ece_reward/group_std_mean": 0.04338055402040482,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004198975954204797,
"signal/frontier_ece_reward/weight": 0.1281127244234085,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004198975954204797,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07316073104739189,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.58046875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.09631870687007904,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013831710256636143,
"signal/frontier_entropy_batch_reward/weight": 0.18899996280670167,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013831710256636143,
"step": 95
},
{
"adaptive_ema/accuracy_reward": 0.3615936058711512,
"adaptive_ema/brier_reward": 0.5923757457391803,
"adaptive_ema/format_reward": 0.8615380862676577,
"adaptive_ema/frontier_aurc_reward": 0.13243977950769706,
"adaptive_ema/frontier_coverage_1": 0.20445407942527577,
"adaptive_ema/frontier_coverage_10": 0.20445407942527577,
"adaptive_ema/frontier_coverage_15": 0.20445407942527577,
"adaptive_ema/frontier_coverage_20": 0.20445407942527577,
"adaptive_ema/frontier_coverage_25": 0.20445407942527577,
"adaptive_ema/frontier_coverage_5": 0.20445407942527577,
"adaptive_ema/frontier_ece_reward": 0.14114867916699728,
"adaptive_ema/frontier_entropy_batch_reward": -0.24705242028494584,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.06140188053250313,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.01871398314833641,
"adaptive_weight/frontier_coverage_1": 0.01716057360172272,
"adaptive_weight/frontier_coverage_10": 0.01716057360172272,
"adaptive_weight/frontier_coverage_15": 0.01716057360172272,
"adaptive_weight/frontier_coverage_20": 0.01716057360172272,
"adaptive_weight/frontier_coverage_25": 0.01716057360172272,
"adaptive_weight/frontier_coverage_5": 0.01716057360172272,
"adaptive_weight/frontier_ece_reward": 0.12937237024307252,
"adaptive_weight/frontier_entropy_batch_reward": 0.18784832060337067,
"calibration/aurc": 0.26604642089660946,
"calibration/batch_distribution_entropy": 0.9707172276846515,
"calibration/batch_entropy_100bins": 0.7014501595964525,
"calibration/batch_entropy_10bins": 0.9707172276846515,
"calibration/batch_entropy_50bins": 0.801622235231511,
"calibration/batch_uniqueness": 0.9017338094976054,
"calibration/buffer_distribution_entropy": 0.9568832194697796,
"calibration/buffer_entropy_100bins": 0.6906226373657561,
"calibration/buffer_entropy_10bins": 0.9568832194697796,
"calibration/buffer_entropy_50bins": 0.7903461780151347,
"calibration/confidence_entropy": 0.4662896261667061,
"calibration/coverage@0%": 0.006262230919765166,
"calibration/coverage@1%": 0.006262230919765166,
"calibration/coverage@10%": 0.15318386130136985,
"calibration/coverage@15%": 0.3173610261741683,
"calibration/coverage@20%": 0.4123524645303327,
"calibration/coverage@25%": 0.5018713307240704,
"calibration/coverage@30%": 0.5972449853228963,
"calibration/coverage@5%": 0.016829745596868884,
"calibration/ece": 0.14112387551981412,
"calibration/mean_confidence": 0.5221430184381115,
"calibration/prompt_uniqueness": 0.7770071865244537,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 950.6,
"completions/max_terminated_length": 545.6,
"completions/mean_length": 190.3484375,
"completions/mean_terminated_length": 190.08496704101563,
"completions/min_length": 82.8,
"completions/min_terminated_length": 82.8,
"epoch": 0.32,
"grad_norm": 0.0008083516149781644,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 337696665.0,
"reward": 0.807330322265625,
"reward_std": 0.08853928595781327,
"rewards/accuracy_reward": 0.51435546875,
"rewards/brier_reward": 0.7678199291229248,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0029150643851608036,
"rewards/frontier_coverage_1": 0.11148936003446579,
"rewards/frontier_coverage_10": 0.11148936003446579,
"rewards/frontier_coverage_15": 0.11148936003446579,
"rewards/frontier_coverage_20": 0.11148936003446579,
"rewards/frontier_coverage_25": 0.11148936003446579,
"rewards/frontier_coverage_5": 0.11148936003446579,
"rewards/frontier_ece_reward": 0.019013339094817637,
"rewards/frontier_entropy_batch_reward": -0.057412856817245485,
"signal/accuracy_reward/centered_abs_mean": 0.099041748046875,
"signal/accuracy_reward/group_bin_occupancy": 0.180078125,
"signal/accuracy_reward/group_std_mean": 0.14076800048351287,
"signal/accuracy_reward/group_zero_std_frac": 0.559375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0495208740234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0495208740234375,
"signal/advantage_abs_mean": 0.06596728339791298,
"signal/advantage_pre_scale_abs_mean": 0.06596728339791298,
"signal/advantage_pre_scale_std": 0.1075965479016304,
"signal/advantage_std": 0.1075965479016304,
"signal/brier_reward/centered_abs_mean": 0.1712301790714264,
"signal/brier_reward/group_bin_occupancy": 0.833984375,
"signal/brier_reward/group_std_mean": 0.21626271903514863,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010516363568603993,
"signal/brier_reward/weight": 0.06140188053250313,
"signal/brier_reward/weighted_centered_abs_mean": 0.010516363568603993,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.002762135770171881,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026979228015989063,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71640625,
"signal/frontier_aurc_reward/group_std_mean": 0.004028804274275899,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.047983067925088e-05,
"signal/frontier_aurc_reward/weight": 0.01871398314833641,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.047983067925088e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2138041526079178,
"signal/frontier_coverage_1/group_bin_occupancy": 0.853515625,
"signal/frontier_coverage_1/group_std_mean": 0.27440894246101377,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003668808238580823,
"signal/frontier_coverage_1/weight": 0.01716057360172272,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003668808238580823,
"signal/frontier_coverage_10/centered_abs_mean": 0.2138041526079178,
"signal/frontier_coverage_10/group_bin_occupancy": 0.853515625,
"signal/frontier_coverage_10/group_std_mean": 0.27440894246101377,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003668808238580823,
"signal/frontier_coverage_10/weight": 0.01716057360172272,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003668808238580823,
"signal/frontier_coverage_15/centered_abs_mean": 0.2138041526079178,
"signal/frontier_coverage_15/group_bin_occupancy": 0.853515625,
"signal/frontier_coverage_15/group_std_mean": 0.27440894246101377,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003668808238580823,
"signal/frontier_coverage_15/weight": 0.01716057360172272,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003668808238580823,
"signal/frontier_coverage_20/centered_abs_mean": 0.2138041526079178,
"signal/frontier_coverage_20/group_bin_occupancy": 0.853515625,
"signal/frontier_coverage_20/group_std_mean": 0.27440894246101377,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003668808238580823,
"signal/frontier_coverage_20/weight": 0.01716057360172272,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003668808238580823,
"signal/frontier_coverage_25/centered_abs_mean": 0.2138041526079178,
"signal/frontier_coverage_25/group_bin_occupancy": 0.853515625,
"signal/frontier_coverage_25/group_std_mean": 0.27440894246101377,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003668808238580823,
"signal/frontier_coverage_25/weight": 0.01716057360172272,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003668808238580823,
"signal/frontier_coverage_5/centered_abs_mean": 0.2138041526079178,
"signal/frontier_coverage_5/group_bin_occupancy": 0.853515625,
"signal/frontier_coverage_5/group_std_mean": 0.27440894246101377,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003668808238580823,
"signal/frontier_coverage_5/weight": 0.01716057360172272,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003668808238580823,
"signal/frontier_ece_reward/centered_abs_mean": 0.035488611459732054,
"signal/frontier_ece_reward/group_bin_occupancy": 0.655078125,
"signal/frontier_ece_reward/group_std_mean": 0.045805665850639346,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004591076914221048,
"signal/frontier_ece_reward/weight": 0.12937237024307252,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004591076914221048,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09487930536270142,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.58828125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.12115364670753478,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0178196107968688,
"signal/frontier_entropy_batch_reward/weight": 0.18784832060337067,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0178196107968688,
"step": 100
},
{
"epoch": 0.32,
"eval_calibration/aurc": 0.5622950821720393,
"eval_calibration/batch_distribution_entropy": 0.8914133677236195,
"eval_calibration/batch_entropy_100bins": 0.5628644573084215,
"eval_calibration/batch_entropy_10bins": 0.8914133677236195,
"eval_calibration/batch_entropy_50bins": 0.6625949320728552,
"eval_calibration/batch_uniqueness": 0.8203125,
"eval_calibration/buffer_distribution_entropy": 0.9578292174133181,
"eval_calibration/buffer_entropy_100bins": 0.692119963145951,
"eval_calibration/buffer_entropy_10bins": 0.9578292174133181,
"eval_calibration/buffer_entropy_50bins": 0.7917815965358247,
"eval_calibration/confidence_entropy": 0.4636687518786541,
"eval_calibration/coverage@0%": 0.015625,
"eval_calibration/coverage@1%": 0.015625,
"eval_calibration/coverage@10%": 0.015625,
"eval_calibration/coverage@15%": 0.0703125,
"eval_calibration/coverage@20%": 0.0703125,
"eval_calibration/coverage@25%": 0.0859375,
"eval_calibration/coverage@30%": 0.09375,
"eval_calibration/coverage@5%": 0.015625,
"eval_calibration/ece": 0.21585937500000002,
"eval_calibration/mean_confidence": 0.406015625,
"eval_calibration/prompt_uniqueness": 0.8203125,
"eval_completions/clipped_ratio": 0.001953125,
"eval_completions/max_length": 632.0,
"eval_completions/max_terminated_length": 331.5,
"eval_completions/mean_length": 192.6338233947754,
"eval_completions/mean_terminated_length": 190.00860977172852,
"eval_completions/min_length": 95.5,
"eval_completions/min_terminated_length": 95.5,
"eval_loss": 0.0,
"eval_num_tokens": 337696665.0,
"eval_reward": 0.7206540256738663,
"eval_reward_std": 0.23302211984992027,
"eval_rewards/accuracy_reward": 0.41015625,
"eval_rewards/brier_reward": 0.7707347571849823,
"eval_rewards/format_reward": 0.998046875,
"eval_rewards/frontier_aurc_reward": -0.0035720185842365026,
"eval_rewards/frontier_coverage_1": 0.19156523048877716,
"eval_rewards/frontier_coverage_10": 0.19156523048877716,
"eval_rewards/frontier_coverage_15": 0.19156523048877716,
"eval_rewards/frontier_coverage_20": 0.19156523048877716,
"eval_rewards/frontier_coverage_25": 0.19156523048877716,
"eval_rewards/frontier_coverage_5": 0.19156523048877716,
"eval_rewards/frontier_ece_reward": 0.013612536480650306,
"eval_rewards/frontier_entropy_batch_reward": -0.2766956575214863,
"eval_runtime": 28.3301,
"eval_samples_per_second": 17.649,
"eval_signal/accuracy_reward/centered_abs_mean": 0.474853515625,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.49471620470285416,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2374267578125,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2374267578125,
"eval_signal/advantage_abs_mean": 0.2067079357802868,
"eval_signal/advantage_pre_scale_abs_mean": 0.2067079357802868,
"eval_signal/advantage_pre_scale_std": 0.2311190329492092,
"eval_signal/advantage_std": 0.2311190329492092,
"eval_signal/brier_reward/centered_abs_mean": 0.21524429693818092,
"eval_signal/brier_reward/group_bin_occupancy": 0.8984375,
"eval_signal/brier_reward/group_std_mean": 0.26688605546951294,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013107528910040855,
"eval_signal/brier_reward/weight": 0.06089605763554573,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.013107528910040855,
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/format_reward/group_bin_occupancy": 0.1328125,
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003758925129659474,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.78125,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0061155634466558695,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.062661461532116e-05,
"eval_signal/frontier_aurc_reward/weight": 0.01878904551267624,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.062661461532116e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3889065384864807,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_coverage_1/group_std_mean": 0.47960302233695984,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006695706397294998,
"eval_signal/frontier_coverage_1/weight": 0.017216749489307404,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006695706397294998,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3889065384864807,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_coverage_10/group_std_mean": 0.47960302233695984,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006695706397294998,
"eval_signal/frontier_coverage_10/weight": 0.017216749489307404,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006695706397294998,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3889065384864807,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_coverage_15/group_std_mean": 0.47960302233695984,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006695706397294998,
"eval_signal/frontier_coverage_15/weight": 0.017216749489307404,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006695706397294998,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3889065384864807,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_coverage_20/group_std_mean": 0.47960302233695984,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006695706397294998,
"eval_signal/frontier_coverage_20/weight": 0.017216749489307404,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006695706397294998,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.3889065384864807,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_coverage_25/group_std_mean": 0.47960302233695984,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006695706397294998,
"eval_signal/frontier_coverage_25/weight": 0.017216749489307404,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006695706397294998,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3889065384864807,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_coverage_5/group_std_mean": 0.47960302233695984,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006695706397294998,
"eval_signal/frontier_coverage_5/weight": 0.017216749489307404,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006695706397294998,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.03698669094592333,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8046875,
"eval_signal/frontier_ece_reward/group_std_mean": 0.055258942767977715,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004802606999874115,
"eval_signal/frontier_ece_reward/weight": 0.12984690070152283,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004802606999874115,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3310266584157944,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.6015625,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.38709257543087006,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.062056735157966614,
"eval_signal/frontier_entropy_batch_reward/weight": 0.18746748566627502,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.062056735157966614,
"eval_steps_per_second": 0.141,
"step": 100
},
{
"adaptive_ema/accuracy_reward": 0.3691240030507804,
"adaptive_ema/brier_reward": 0.6010773515502917,
"adaptive_ema/format_reward": 0.8683059009886822,
"adaptive_ema/frontier_aurc_reward": 0.1258143913355397,
"adaptive_ema/frontier_coverage_1": 0.19983967871364694,
"adaptive_ema/frontier_coverage_10": 0.19983967871364694,
"adaptive_ema/frontier_coverage_15": 0.19983967871364694,
"adaptive_ema/frontier_coverage_20": 0.19983967871364694,
"adaptive_ema/frontier_coverage_25": 0.19983967871364694,
"adaptive_ema/frontier_coverage_5": 0.19983967871364694,
"adaptive_ema/frontier_ece_reward": 0.13516118757960247,
"adaptive_ema/frontier_entropy_batch_reward": -0.23766472191903443,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.06022153198719025,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.01889778971672058,
"adaptive_weight/frontier_coverage_1": 0.017297543585300446,
"adaptive_weight/frontier_coverage_10": 0.017297543585300446,
"adaptive_weight/frontier_coverage_15": 0.017297543585300446,
"adaptive_weight/frontier_coverage_20": 0.017297543585300446,
"adaptive_weight/frontier_coverage_25": 0.017297543585300446,
"adaptive_weight/frontier_coverage_5": 0.017297543585300446,
"adaptive_weight/frontier_ece_reward": 0.1305568039417267,
"adaptive_weight/frontier_entropy_batch_reward": 0.18683860898017884,
"calibration/aurc": 0.3309582655250696,
"calibration/batch_distribution_entropy": 0.9672548223786464,
"calibration/batch_entropy_100bins": 0.6964700945293234,
"calibration/batch_entropy_10bins": 0.9672548223786464,
"calibration/batch_entropy_50bins": 0.7965249397583692,
"calibration/batch_uniqueness": 0.8961151123046875,
"calibration/buffer_distribution_entropy": 0.9592387952294402,
"calibration/buffer_entropy_100bins": 0.6936079623072494,
"calibration/buffer_entropy_10bins": 0.9592387952294402,
"calibration/buffer_entropy_50bins": 0.7931674661918849,
"calibration/confidence_entropy": 0.48189768326767374,
"calibration/coverage@0%": 0.005859375,
"calibration/coverage@1%": 0.005859375,
"calibration/coverage@10%": 0.006640625,
"calibration/coverage@15%": 0.081640625,
"calibration/coverage@20%": 0.177734375,
"calibration/coverage@25%": 0.26640625,
"calibration/coverage@30%": 0.430859375,
"calibration/coverage@5%": 0.005859375,
"calibration/ece": 0.13445703125000003,
"calibration/mean_confidence": 0.47684765625000003,
"calibration/prompt_uniqueness": 0.775341796875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 749.4,
"completions/max_terminated_length": 543.6,
"completions/mean_length": 189.60478515625,
"completions/mean_terminated_length": 189.47372741699218,
"completions/min_length": 81.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.336,
"grad_norm": 0.000987388426437974,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 354360650.0,
"reward": 0.80974360704422,
"reward_std": 0.09494156986474991,
"rewards/accuracy_reward": 0.52021484375,
"rewards/brier_reward": 0.7602529168128968,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0028106594923883676,
"rewards/frontier_coverage_1": 0.09387856647372246,
"rewards/frontier_coverage_10": 0.09387856647372246,
"rewards/frontier_coverage_15": 0.09387856647372246,
"rewards/frontier_coverage_20": 0.09387856647372246,
"rewards/frontier_coverage_25": 0.09387856647372246,
"rewards/frontier_coverage_5": 0.09387856647372246,
"rewards/frontier_ece_reward": 0.016277409344911575,
"rewards/frontier_entropy_batch_reward": -0.04225642457604408,
"signal/accuracy_reward/centered_abs_mean": 0.124945068359375,
"signal/accuracy_reward/group_bin_occupancy": 0.18671875,
"signal/accuracy_reward/group_std_mean": 0.16898487508296967,
"signal/accuracy_reward/group_zero_std_frac": 0.50625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0624725341796875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0624725341796875,
"signal/advantage_abs_mean": 0.07221008986234664,
"signal/advantage_pre_scale_abs_mean": 0.07221008986234664,
"signal/advantage_pre_scale_std": 0.11600485146045685,
"signal/advantage_std": 0.11600485146045685,
"signal/brier_reward/centered_abs_mean": 0.16997200548648833,
"signal/brier_reward/group_bin_occupancy": 0.84453125,
"signal/brier_reward/group_std_mean": 0.2162840783596039,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01023493316024542,
"signal/brier_reward/weight": 0.06022153198719025,
"signal/brier_reward/weighted_centered_abs_mean": 0.01023493316024542,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002428735839203,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.728515625,
"signal/frontier_aurc_reward/group_std_mean": 0.0036500558257102967,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5901389239588755e-05,
"signal/frontier_aurc_reward/weight": 0.01889778971672058,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5901389239588755e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22394213676452637,
"signal/frontier_coverage_1/group_bin_occupancy": 0.858984375,
"signal/frontier_coverage_1/group_std_mean": 0.28910828232765196,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038735364098101853,
"signal/frontier_coverage_1/weight": 0.017297543585300446,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038735364098101853,
"signal/frontier_coverage_10/centered_abs_mean": 0.22394213676452637,
"signal/frontier_coverage_10/group_bin_occupancy": 0.858984375,
"signal/frontier_coverage_10/group_std_mean": 0.28910828232765196,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038735364098101853,
"signal/frontier_coverage_10/weight": 0.017297543585300446,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038735364098101853,
"signal/frontier_coverage_15/centered_abs_mean": 0.22394213676452637,
"signal/frontier_coverage_15/group_bin_occupancy": 0.858984375,
"signal/frontier_coverage_15/group_std_mean": 0.28910828232765196,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038735364098101853,
"signal/frontier_coverage_15/weight": 0.017297543585300446,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038735364098101853,
"signal/frontier_coverage_20/centered_abs_mean": 0.22394213676452637,
"signal/frontier_coverage_20/group_bin_occupancy": 0.858984375,
"signal/frontier_coverage_20/group_std_mean": 0.28910828232765196,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038735364098101853,
"signal/frontier_coverage_20/weight": 0.017297543585300446,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038735364098101853,
"signal/frontier_coverage_25/centered_abs_mean": 0.22394213676452637,
"signal/frontier_coverage_25/group_bin_occupancy": 0.858984375,
"signal/frontier_coverage_25/group_std_mean": 0.28910828232765196,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038735364098101853,
"signal/frontier_coverage_25/weight": 0.017297543585300446,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038735364098101853,
"signal/frontier_coverage_5/centered_abs_mean": 0.22394213676452637,
"signal/frontier_coverage_5/group_bin_occupancy": 0.858984375,
"signal/frontier_coverage_5/group_std_mean": 0.28910828232765196,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038735364098101853,
"signal/frontier_coverage_5/weight": 0.017297543585300446,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038735364098101853,
"signal/frontier_ece_reward/centered_abs_mean": 0.029718470200896262,
"signal/frontier_ece_reward/group_bin_occupancy": 0.66875,
"signal/frontier_ece_reward/group_std_mean": 0.03931205943226814,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003879939578473568,
"signal/frontier_ece_reward/weight": 0.1305568039417267,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003879939578473568,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07130372412502765,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.603515625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.09097694158554077,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013328920677304268,
"signal/frontier_entropy_batch_reward/weight": 0.18683860898017884,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013328920677304268,
"step": 105
},
{
"adaptive_ema/accuracy_reward": 0.37519145558631123,
"adaptive_ema/brier_reward": 0.609011320573811,
"adaptive_ema/format_reward": 0.8747411738981095,
"adaptive_ema/frontier_aurc_reward": 0.11950125783945201,
"adaptive_ema/frontier_coverage_1": 0.19560125560997757,
"adaptive_ema/frontier_coverage_10": 0.19560125560997757,
"adaptive_ema/frontier_coverage_15": 0.19560125560997757,
"adaptive_ema/frontier_coverage_20": 0.19560125560997757,
"adaptive_ema/frontier_coverage_25": 0.19560125560997757,
"adaptive_ema/frontier_coverage_5": 0.19560125560997757,
"adaptive_ema/frontier_ece_reward": 0.12916943505117517,
"adaptive_ema/frontier_entropy_batch_reward": -0.22836366793030613,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.05914327949285507,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.019072819501161575,
"adaptive_weight/frontier_coverage_1": 0.017424381524324416,
"adaptive_weight/frontier_coverage_10": 0.017424381524324416,
"adaptive_weight/frontier_coverage_15": 0.017424381524324416,
"adaptive_weight/frontier_coverage_20": 0.017424381524324416,
"adaptive_weight/frontier_coverage_25": 0.017424381524324416,
"adaptive_weight/frontier_coverage_5": 0.017424381524324416,
"adaptive_weight/frontier_ece_reward": 0.13172760903835296,
"adaptive_weight/frontier_entropy_batch_reward": 0.18580999970436096,
"calibration/aurc": 0.3588262650357728,
"calibration/batch_distribution_entropy": 0.9422539276380764,
"calibration/batch_entropy_100bins": 0.6798601217797626,
"calibration/batch_entropy_10bins": 0.9422539276380764,
"calibration/batch_entropy_50bins": 0.7800342465320881,
"calibration/batch_uniqueness": 0.8849484989114356,
"calibration/buffer_distribution_entropy": 0.9623683344824145,
"calibration/buffer_entropy_100bins": 0.6978108431612698,
"calibration/buffer_entropy_10bins": 0.9623683344824145,
"calibration/buffer_entropy_50bins": 0.7969227700066659,
"calibration/confidence_entropy": 0.4530703545110395,
"calibration/coverage@0%": 0.003910836594911937,
"calibration/coverage@1%": 0.003910836594911937,
"calibration/coverage@10%": 0.05782625978473581,
"calibration/coverage@15%": 0.1914291829745597,
"calibration/coverage@20%": 0.299706457925636,
"calibration/coverage@25%": 0.3837137659001957,
"calibration/coverage@30%": 0.43528238136007824,
"calibration/coverage@5%": 0.003910836594911937,
"calibration/ece": 0.13784669508550568,
"calibration/mean_confidence": 0.40941150367161133,
"calibration/prompt_uniqueness": 0.7607474717091571,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 542.8,
"completions/max_terminated_length": 542.8,
"completions/mean_length": 191.31513671875,
"completions/mean_terminated_length": 191.31513671875,
"completions/min_length": 82.6,
"completions/min_terminated_length": 82.6,
"epoch": 0.352,
"grad_norm": 0.0009676101035438478,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 371580133.0,
"reward": 0.7830458164215088,
"reward_std": 0.0892082542181015,
"rewards/accuracy_reward": 0.46435546875,
"rewards/brier_reward": 0.7690168023109436,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.003099285624921322,
"rewards/frontier_coverage_1": 0.13656647205352784,
"rewards/frontier_coverage_10": 0.13656647205352784,
"rewards/frontier_coverage_15": 0.13656647205352784,
"rewards/frontier_coverage_20": 0.13656647205352784,
"rewards/frontier_coverage_25": 0.13656647205352784,
"rewards/frontier_coverage_5": 0.13656647205352784,
"rewards/frontier_ece_reward": 0.011311782151460647,
"rewards/frontier_entropy_batch_reward": -0.05575864017009735,
"signal/accuracy_reward/centered_abs_mean": 0.110345458984375,
"signal/accuracy_reward/group_bin_occupancy": 0.17734375,
"signal/accuracy_reward/group_std_mean": 0.14654679000377654,
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0551727294921875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0551727294921875,
"signal/advantage_abs_mean": 0.06846952587366104,
"signal/advantage_pre_scale_abs_mean": 0.06846952587366104,
"signal/advantage_pre_scale_std": 0.10927639603614807,
"signal/advantage_std": 0.10927639603614807,
"signal/brier_reward/centered_abs_mean": 0.16186352968215942,
"signal/brier_reward/group_bin_occupancy": 0.84140625,
"signal/brier_reward/group_std_mean": 0.20741929709911347,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00957336314022541,
"signal/brier_reward/weight": 0.05914327949285507,
"signal/brier_reward/weighted_centered_abs_mean": 0.00957336314022541,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025628196075558663,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.703125,
"signal/frontier_aurc_reward/group_std_mean": 0.004089434165507555,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.888393232249655e-05,
"signal/frontier_aurc_reward/weight": 0.019072819501161575,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.888393232249655e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2231020450592041,
"signal/frontier_coverage_1/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_1/group_std_mean": 0.28328863382339475,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038873269222676754,
"signal/frontier_coverage_1/weight": 0.017424381524324416,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038873269222676754,
"signal/frontier_coverage_10/centered_abs_mean": 0.2231020450592041,
"signal/frontier_coverage_10/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_10/group_std_mean": 0.28328863382339475,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038873269222676754,
"signal/frontier_coverage_10/weight": 0.017424381524324416,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038873269222676754,
"signal/frontier_coverage_15/centered_abs_mean": 0.2231020450592041,
"signal/frontier_coverage_15/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_15/group_std_mean": 0.28328863382339475,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038873269222676754,
"signal/frontier_coverage_15/weight": 0.017424381524324416,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038873269222676754,
"signal/frontier_coverage_20/centered_abs_mean": 0.2231020450592041,
"signal/frontier_coverage_20/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_20/group_std_mean": 0.28328863382339475,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038873269222676754,
"signal/frontier_coverage_20/weight": 0.017424381524324416,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038873269222676754,
"signal/frontier_coverage_25/centered_abs_mean": 0.2231020450592041,
"signal/frontier_coverage_25/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_25/group_std_mean": 0.28328863382339475,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038873269222676754,
"signal/frontier_coverage_25/weight": 0.017424381524324416,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038873269222676754,
"signal/frontier_coverage_5/centered_abs_mean": 0.2231020450592041,
"signal/frontier_coverage_5/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_5/group_std_mean": 0.28328863382339475,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038873269222676754,
"signal/frontier_coverage_5/weight": 0.017424381524324416,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038873269222676754,
"signal/frontier_ece_reward/centered_abs_mean": 0.022969850897789003,
"signal/frontier_ece_reward/group_bin_occupancy": 0.691015625,
"signal/frontier_ece_reward/group_std_mean": 0.030405202880501747,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030254576820880176,
"signal/frontier_ece_reward/weight": 0.13172760903835296,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030254576820880176,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08273435607552529,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.580859375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.1064729444682598,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01537502845749259,
"signal/frontier_entropy_batch_reward/weight": 0.18580999970436096,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01537502845749259,
"step": 110
},
{
"adaptive_ema/accuracy_reward": 0.3808226709134879,
"adaptive_ema/brier_reward": 0.6169466494056433,
"adaptive_ema/format_reward": 0.8808724066231509,
"adaptive_ema/frontier_aurc_reward": 0.11349249292544246,
"adaptive_ema/frontier_coverage_1": 0.19190069321476277,
"adaptive_ema/frontier_coverage_10": 0.19190069321476277,
"adaptive_ema/frontier_coverage_15": 0.19190069321476277,
"adaptive_ema/frontier_coverage_20": 0.19190069321476277,
"adaptive_ema/frontier_coverage_25": 0.19190069321476277,
"adaptive_ema/frontier_coverage_5": 0.19190069321476277,
"adaptive_ema/frontier_ece_reward": 0.12340020382279851,
"adaptive_ema/frontier_entropy_batch_reward": -0.21959110652164848,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.05806405767798424,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.019243112578988076,
"adaptive_weight/frontier_coverage_1": 0.017541125789284705,
"adaptive_weight/frontier_coverage_10": 0.017541125789284705,
"adaptive_weight/frontier_coverage_15": 0.017541125789284705,
"adaptive_weight/frontier_coverage_20": 0.017541125789284705,
"adaptive_weight/frontier_coverage_25": 0.017541125789284705,
"adaptive_weight/frontier_coverage_5": 0.017541125789284705,
"adaptive_weight/frontier_ece_reward": 0.13287744224071502,
"adaptive_weight/frontier_entropy_batch_reward": 0.1848686307668686,
"calibration/aurc": 0.3854856134301831,
"calibration/batch_distribution_entropy": 0.956228883497219,
"calibration/batch_entropy_100bins": 0.6788869303045917,
"calibration/batch_entropy_10bins": 0.956228883497219,
"calibration/batch_entropy_50bins": 0.7826528516805218,
"calibration/batch_uniqueness": 0.8898590087890625,
"calibration/buffer_distribution_entropy": 0.9632650275282651,
"calibration/buffer_entropy_100bins": 0.6999079540647883,
"calibration/buffer_entropy_10bins": 0.9632650275282651,
"calibration/buffer_entropy_50bins": 0.7985352523732561,
"calibration/confidence_entropy": 0.4754072231936579,
"calibration/coverage@0%": 0.004296875,
"calibration/coverage@1%": 0.004296875,
"calibration/coverage@10%": 0.004296875,
"calibration/coverage@15%": 0.06640625,
"calibration/coverage@20%": 0.1453125,
"calibration/coverage@25%": 0.24609375,
"calibration/coverage@30%": 0.3421875,
"calibration/coverage@5%": 0.004296875,
"calibration/ece": 0.14345156250000002,
"calibration/mean_confidence": 0.4806421875,
"calibration/prompt_uniqueness": 0.755712890625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 709.2,
"completions/max_terminated_length": 534.6,
"completions/mean_length": 192.16748046875,
"completions/mean_terminated_length": 191.9038848876953,
"completions/min_length": 85.2,
"completions/min_terminated_length": 85.2,
"epoch": 0.368,
"grad_norm": 0.0009233996388502419,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 388613400.0,
"reward": 0.7953165411949158,
"reward_std": 0.08573707342147827,
"rewards/accuracy_reward": 0.49169921875,
"rewards/brier_reward": 0.765993058681488,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.003442125115543604,
"rewards/frontier_coverage_1": 0.11713624447584152,
"rewards/frontier_coverage_10": 0.11713624447584152,
"rewards/frontier_coverage_15": 0.11713624447584152,
"rewards/frontier_coverage_20": 0.11713624447584152,
"rewards/frontier_coverage_25": 0.11713624447584152,
"rewards/frontier_coverage_5": 0.11713624447584152,
"rewards/frontier_ece_reward": 0.0102972861379385,
"rewards/frontier_entropy_batch_reward": -0.0469327487051487,
"signal/accuracy_reward/centered_abs_mean": 0.108538818359375,
"signal/accuracy_reward/group_bin_occupancy": 0.1765625,
"signal/accuracy_reward/group_std_mean": 0.14366440922021867,
"signal/accuracy_reward/group_zero_std_frac": 0.5875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0542694091796875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0542694091796875,
"signal/advantage_abs_mean": 0.06664566695690155,
"signal/advantage_pre_scale_abs_mean": 0.06664566695690155,
"signal/advantage_pre_scale_std": 0.10758482664823532,
"signal/advantage_std": 0.10758482664823532,
"signal/brier_reward/centered_abs_mean": 0.1609862267971039,
"signal/brier_reward/group_bin_occupancy": 0.849609375,
"signal/brier_reward/group_std_mean": 0.20346853733062745,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.009347083792090417,
"signal/brier_reward/weight": 0.05806405767798424,
"signal/brier_reward/weighted_centered_abs_mean": 0.009347083792090417,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030787172727286816,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7125,
"signal/frontier_aurc_reward/group_std_mean": 0.004707864206284285,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.925965087953955e-05,
"signal/frontier_aurc_reward/weight": 0.019243112578988076,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.925965087953955e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.20927395224571227,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_1/group_std_mean": 0.2662800371646881,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036708842031657695,
"signal/frontier_coverage_1/weight": 0.017541125789284705,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036708842031657695,
"signal/frontier_coverage_10/centered_abs_mean": 0.20927395224571227,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_10/group_std_mean": 0.2662800371646881,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036708842031657695,
"signal/frontier_coverage_10/weight": 0.017541125789284705,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036708842031657695,
"signal/frontier_coverage_15/centered_abs_mean": 0.20927395224571227,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_15/group_std_mean": 0.2662800371646881,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036708842031657695,
"signal/frontier_coverage_15/weight": 0.017541125789284705,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036708842031657695,
"signal/frontier_coverage_20/centered_abs_mean": 0.20927395224571227,
"signal/frontier_coverage_20/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_20/group_std_mean": 0.2662800371646881,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036708842031657695,
"signal/frontier_coverage_20/weight": 0.017541125789284705,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036708842031657695,
"signal/frontier_coverage_25/centered_abs_mean": 0.20927395224571227,
"signal/frontier_coverage_25/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_25/group_std_mean": 0.2662800371646881,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036708842031657695,
"signal/frontier_coverage_25/weight": 0.017541125789284705,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036708842031657695,
"signal/frontier_coverage_5/centered_abs_mean": 0.20927395224571227,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_5/group_std_mean": 0.2662800371646881,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036708842031657695,
"signal/frontier_coverage_5/weight": 0.017541125789284705,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036708842031657695,
"signal/frontier_ece_reward/centered_abs_mean": 0.019026529788970948,
"signal/frontier_ece_reward/group_bin_occupancy": 0.713671875,
"signal/frontier_ece_reward/group_std_mean": 0.02441370189189911,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025277684442698957,
"signal/frontier_ece_reward/weight": 0.13287744224071502,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025277684442698957,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07778175473213196,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5359375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.10077935457229614,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014381194859743119,
"signal/frontier_entropy_batch_reward/weight": 0.1848686307668686,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014381194859743119,
"step": 115
},
{
"adaptive_ema/accuracy_reward": 0.38691835656165113,
"adaptive_ema/brier_reward": 0.6242956807223472,
"adaptive_ema/format_reward": 0.886702190326939,
"adaptive_ema/frontier_aurc_reward": 0.10776341570590438,
"adaptive_ema/frontier_coverage_1": 0.18755842962724928,
"adaptive_ema/frontier_coverage_10": 0.18755842962724928,
"adaptive_ema/frontier_coverage_15": 0.18755842962724928,
"adaptive_ema/frontier_coverage_20": 0.18755842962724928,
"adaptive_ema/frontier_coverage_25": 0.187511311432699,
"adaptive_ema/frontier_coverage_5": 0.18755842962724928,
"adaptive_ema/frontier_ece_reward": 0.117820152691086,
"adaptive_ema/frontier_entropy_batch_reward": -0.21149949806803367,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.057041678577661514,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.01939864121377468,
"adaptive_weight/frontier_coverage_1": 0.017663762718439103,
"adaptive_weight/frontier_coverage_10": 0.017663762718439103,
"adaptive_weight/frontier_coverage_15": 0.017663762718439103,
"adaptive_weight/frontier_coverage_20": 0.017663762718439103,
"adaptive_weight/frontier_coverage_25": 0.01766478829085827,
"adaptive_weight/frontier_coverage_5": 0.017663762718439103,
"adaptive_weight/frontier_ece_reward": 0.1339384913444519,
"adaptive_weight/frontier_entropy_batch_reward": 0.18393758237361907,
"calibration/aurc": 0.35493774799708466,
"calibration/batch_distribution_entropy": 0.9491989441612088,
"calibration/batch_entropy_100bins": 0.6622287773312955,
"calibration/batch_entropy_10bins": 0.9491989441612088,
"calibration/batch_entropy_50bins": 0.7679444207624779,
"calibration/batch_uniqueness": 0.8827301025390625,
"calibration/buffer_distribution_entropy": 0.9636707005952015,
"calibration/buffer_entropy_100bins": 0.7013720619319166,
"calibration/buffer_entropy_10bins": 0.9636707005952015,
"calibration/buffer_entropy_50bins": 0.799605255599517,
"calibration/confidence_entropy": 0.4570547443173941,
"calibration/coverage@0%": 0.005078125,
"calibration/coverage@1%": 0.005078125,
"calibration/coverage@10%": 0.123046875,
"calibration/coverage@15%": 0.201953125,
"calibration/coverage@20%": 0.251171875,
"calibration/coverage@25%": 0.29296875,
"calibration/coverage@30%": 0.360546875,
"calibration/coverage@5%": 0.028515625,
"calibration/ece": 0.134556640625,
"calibration/mean_confidence": 0.445193359375,
"calibration/prompt_uniqueness": 0.736328125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 959.6,
"completions/max_terminated_length": 539.4,
"completions/mean_length": 191.00859375,
"completions/mean_terminated_length": 190.7458068847656,
"completions/min_length": 85.6,
"completions/min_terminated_length": 85.6,
"epoch": 0.384,
"grad_norm": 0.0009219791973009706,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 405425840.0,
"reward": 0.8066781878471374,
"reward_std": 0.0858098804950714,
"rewards/accuracy_reward": 0.51943359375,
"rewards/brier_reward": 0.7786778092384339,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0029779852367937563,
"rewards/frontier_coverage_1": 0.10454831868410111,
"rewards/frontier_coverage_10": 0.10454831868410111,
"rewards/frontier_coverage_15": 0.10454831868410111,
"rewards/frontier_coverage_20": 0.10454831868410111,
"rewards/frontier_coverage_25": 0.10010432302951813,
"rewards/frontier_coverage_5": 0.10454831868410111,
"rewards/frontier_ece_reward": 0.010108662210404873,
"rewards/frontier_entropy_batch_reward": -0.05299887377768755,
"signal/accuracy_reward/centered_abs_mean": 0.103521728515625,
"signal/accuracy_reward/group_bin_occupancy": 0.1796875,
"signal/accuracy_reward/group_std_mean": 0.1436680018901825,
"signal/accuracy_reward/group_zero_std_frac": 0.5625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0517608642578125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0517608642578125,
"signal/advantage_abs_mean": 0.06451480388641358,
"signal/advantage_pre_scale_abs_mean": 0.06451480388641358,
"signal/advantage_pre_scale_std": 0.10583080053329467,
"signal/advantage_std": 0.10583080053329467,
"signal/brier_reward/centered_abs_mean": 0.15401501059532166,
"signal/brier_reward/group_bin_occupancy": 0.82265625,
"signal/brier_reward/group_std_mean": 0.19831812977790833,
"signal/brier_reward/group_zero_std_frac": 0.003125,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.008785775676369668,
"signal/brier_reward/weight": 0.057041678577661514,
"signal/brier_reward/weighted_centered_abs_mean": 0.008785775676369668,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030430202838033437,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6984375,
"signal/frontier_aurc_reward/group_std_mean": 0.004862629622220993,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.901477925362997e-05,
"signal/frontier_aurc_reward/weight": 0.01939864121377468,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.901477925362997e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.20419052243232727,
"signal/frontier_coverage_1/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_1/group_std_mean": 0.2636923313140869,
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036070624366402624,
"signal/frontier_coverage_1/weight": 0.017663762718439103,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036070624366402624,
"signal/frontier_coverage_10/centered_abs_mean": 0.20419052243232727,
"signal/frontier_coverage_10/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_10/group_std_mean": 0.2636923313140869,
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036070624366402624,
"signal/frontier_coverage_10/weight": 0.017663762718439103,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036070624366402624,
"signal/frontier_coverage_15/centered_abs_mean": 0.20419052243232727,
"signal/frontier_coverage_15/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_15/group_std_mean": 0.2636923313140869,
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036070624366402624,
"signal/frontier_coverage_15/weight": 0.017663762718439103,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036070624366402624,
"signal/frontier_coverage_20/centered_abs_mean": 0.20419052243232727,
"signal/frontier_coverage_20/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_20/group_std_mean": 0.2636923313140869,
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036070624366402624,
"signal/frontier_coverage_20/weight": 0.017663762718439103,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036070624366402624,
"signal/frontier_coverage_25/centered_abs_mean": 0.1982475072145462,
"signal/frontier_coverage_25/group_bin_occupancy": 0.85703125,
"signal/frontier_coverage_25/group_std_mean": 0.25631812810897825,
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003502086503431201,
"signal/frontier_coverage_25/weight": 0.01766478829085827,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003502086503431201,
"signal/frontier_coverage_5/centered_abs_mean": 0.20419052243232727,
"signal/frontier_coverage_5/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_5/group_std_mean": 0.2636923313140869,
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036070624366402624,
"signal/frontier_coverage_5/weight": 0.017663762718439103,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036070624366402624,
"signal/frontier_ece_reward/centered_abs_mean": 0.015199101902544499,
"signal/frontier_ece_reward/group_bin_occupancy": 0.739453125,
"signal/frontier_ece_reward/group_std_mean": 0.019395126774907113,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020354266278445722,
"signal/frontier_ece_reward/weight": 0.1339384913444519,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020354266278445722,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07814936712384224,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.619921875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.09951084926724434,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014378698635846376,
"signal/frontier_entropy_batch_reward/weight": 0.18393758237361907,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014378698635846376,
"step": 120
},
{
"adaptive_ema/accuracy_reward": 0.39235090036761877,
"adaptive_ema/brier_reward": 0.6316182781002241,
"adaptive_ema/format_reward": 0.892235737098863,
"adaptive_ema/frontier_aurc_reward": 0.10232412924196503,
"adaptive_ema/frontier_coverage_1": 0.18421709359957616,
"adaptive_ema/frontier_coverage_10": 0.18421709359957616,
"adaptive_ema/frontier_coverage_15": 0.18421709359957616,
"adaptive_ema/frontier_coverage_20": 0.18421709359957616,
"adaptive_ema/frontier_coverage_25": 0.18374265269248752,
"adaptive_ema/frontier_coverage_5": 0.18421709359957616,
"adaptive_ema/frontier_ece_reward": 0.11247543002688846,
"adaptive_ema/frontier_entropy_batch_reward": -0.2030708460654071,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.056043879687786104,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.019556624069809913,
"adaptive_weight/frontier_coverage_1": 0.01777251176536083,
"adaptive_weight/frontier_coverage_10": 0.01777251176536083,
"adaptive_weight/frontier_coverage_15": 0.01777251176536083,
"adaptive_weight/frontier_coverage_20": 0.01777251176536083,
"adaptive_weight/frontier_coverage_25": 0.017782849073410035,
"adaptive_weight/frontier_coverage_5": 0.01777251176536083,
"adaptive_weight/frontier_ece_reward": 0.135024231672287,
"adaptive_weight/frontier_entropy_batch_reward": 0.18302985429763793,
"calibration/aurc": 0.4509882354254221,
"calibration/batch_distribution_entropy": 0.9564365076653628,
"calibration/batch_entropy_100bins": 0.6665790545329219,
"calibration/batch_entropy_10bins": 0.9564365076653628,
"calibration/batch_entropy_50bins": 0.7725879728895253,
"calibration/batch_uniqueness": 0.887788942993016,
"calibration/buffer_distribution_entropy": 0.9652585082100484,
"calibration/buffer_entropy_100bins": 0.7032827252388143,
"calibration/buffer_entropy_10bins": 0.9652585082100484,
"calibration/buffer_entropy_50bins": 0.8014792047520682,
"calibration/confidence_entropy": 0.4920077470967188,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.015234375,
"calibration/coverage@30%": 0.0869515931372549,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.16312596060684545,
"calibration/mean_confidence": 0.4642518884779939,
"calibration/prompt_uniqueness": 0.7708412783902041,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 905.4,
"completions/max_terminated_length": 519.4,
"completions/mean_length": 191.51484375,
"completions/mean_terminated_length": 191.1214630126953,
"completions/min_length": 83.2,
"completions/min_terminated_length": 83.2,
"epoch": 0.4,
"grad_norm": 0.000978235388174653,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 422423400.0,
"reward": 0.7928371787071228,
"reward_std": 0.09395273178815841,
"rewards/accuracy_reward": 0.4919921875,
"rewards/brier_reward": 0.764430582523346,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0035829836037009955,
"rewards/frontier_coverage_1": 0.11412625126540661,
"rewards/frontier_coverage_10": 0.11412625126540661,
"rewards/frontier_coverage_15": 0.11412625126540661,
"rewards/frontier_coverage_20": 0.11412625126540661,
"rewards/frontier_coverage_25": 0.10572034269571304,
"rewards/frontier_coverage_5": 0.11412625126540661,
"rewards/frontier_ece_reward": 0.007939350325614214,
"rewards/frontier_entropy_batch_reward": -0.04892009943723678,
"signal/accuracy_reward/centered_abs_mean": 0.1263916015625,
"signal/accuracy_reward/group_bin_occupancy": 0.18515625,
"signal/accuracy_reward/group_std_mean": 0.16802475452423096,
"signal/accuracy_reward/group_zero_std_frac": 0.51875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06319580078125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06319580078125,
"signal/advantage_abs_mean": 0.07194100320339203,
"signal/advantage_pre_scale_abs_mean": 0.07194100320339203,
"signal/advantage_pre_scale_std": 0.11499589532613755,
"signal/advantage_std": 0.11499589532613755,
"signal/brier_reward/centered_abs_mean": 0.1625375419855118,
"signal/brier_reward/group_bin_occupancy": 0.832421875,
"signal/brier_reward/group_std_mean": 0.20592648088932036,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0091098016127944,
"signal/brier_reward/weight": 0.056043879687786104,
"signal/brier_reward/weighted_centered_abs_mean": 0.0091098016127944,
"signal/format_reward/centered_abs_mean": 0.00074462890625,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0018734002020210027,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0034305922221392394,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72109375,
"signal/frontier_aurc_reward/group_std_mean": 0.005304851569235325,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.709443987347185e-05,
"signal/frontier_aurc_reward/weight": 0.019556624069809913,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.709443987347185e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.21515123248100282,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_1/group_std_mean": 0.276702755689621,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003823632560670376,
"signal/frontier_coverage_1/weight": 0.01777251176536083,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003823632560670376,
"signal/frontier_coverage_10/centered_abs_mean": 0.21515123248100282,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_10/group_std_mean": 0.276702755689621,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003823632560670376,
"signal/frontier_coverage_10/weight": 0.01777251176536083,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003823632560670376,
"signal/frontier_coverage_15/centered_abs_mean": 0.21515123248100282,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_15/group_std_mean": 0.276702755689621,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003823632560670376,
"signal/frontier_coverage_15/weight": 0.01777251176536083,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003823632560670376,
"signal/frontier_coverage_20/centered_abs_mean": 0.21515123248100282,
"signal/frontier_coverage_20/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_20/group_std_mean": 0.276702755689621,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003823632560670376,
"signal/frontier_coverage_20/weight": 0.01777251176536083,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003823632560670376,
"signal/frontier_coverage_25/centered_abs_mean": 0.19758794605731964,
"signal/frontier_coverage_25/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_25/group_std_mean": 0.25453805923461914,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035135370679199697,
"signal/frontier_coverage_25/weight": 0.017782849073410035,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035135370679199697,
"signal/frontier_coverage_5/centered_abs_mean": 0.21515123248100282,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_5/group_std_mean": 0.276702755689621,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003823632560670376,
"signal/frontier_coverage_5/weight": 0.01777251176536083,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003823632560670376,
"signal/frontier_ece_reward/centered_abs_mean": 0.013926656730473042,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7546875,
"signal/frontier_ece_reward/group_std_mean": 0.01783502697944641,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018803888699039817,
"signal/frontier_ece_reward/weight": 0.135024231672287,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018803888699039817,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07931768745183945,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.59609375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.10206554159522056,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014510990865528584,
"signal/frontier_entropy_batch_reward/weight": 0.18302985429763793,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014510990865528584,
"step": 125
},
{
"adaptive_ema/accuracy_reward": 0.3971258292856597,
"adaptive_ema/brier_reward": 0.6381828016465828,
"adaptive_ema/format_reward": 0.8974846743858107,
"adaptive_ema/frontier_aurc_reward": 0.0971401174135465,
"adaptive_ema/frontier_coverage_1": 0.18090152116493946,
"adaptive_ema/frontier_coverage_10": 0.18090152116493946,
"adaptive_ema/frontier_coverage_15": 0.18090152116493946,
"adaptive_ema/frontier_coverage_20": 0.18090152116493946,
"adaptive_ema/frontier_coverage_25": 0.1800799254664386,
"adaptive_ema/frontier_coverage_5": 0.18090152116493946,
"adaptive_ema/frontier_ece_reward": 0.1073283127693054,
"adaptive_ema/frontier_entropy_batch_reward": -0.1952066626486843,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.055139760673046115,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.01970335878431797,
"adaptive_weight/frontier_coverage_1": 0.017875409871339797,
"adaptive_weight/frontier_coverage_10": 0.017875409871339797,
"adaptive_weight/frontier_coverage_15": 0.017875409871339797,
"adaptive_weight/frontier_coverage_20": 0.017875409871339797,
"adaptive_weight/frontier_coverage_25": 0.017893340811133383,
"adaptive_weight/frontier_coverage_5": 0.017875409871339797,
"adaptive_weight/frontier_ece_reward": 0.1360406458377838,
"adaptive_weight/frontier_entropy_batch_reward": 0.18214584290981292,
"calibration/aurc": 0.3447294166901377,
"calibration/batch_distribution_entropy": 0.9525498814443025,
"calibration/batch_entropy_100bins": 0.6685293253297835,
"calibration/batch_entropy_10bins": 0.9525498814443025,
"calibration/batch_entropy_50bins": 0.7720460736699484,
"calibration/batch_uniqueness": 0.88769145687729,
"calibration/buffer_distribution_entropy": 0.9674000537650531,
"calibration/buffer_entropy_100bins": 0.704135251096097,
"calibration/buffer_entropy_10bins": 0.9674000537650531,
"calibration/buffer_entropy_50bins": 0.8024988319224787,
"calibration/confidence_entropy": 0.5013468191546062,
"calibration/coverage@0%": 0.004296875,
"calibration/coverage@1%": 0.004296875,
"calibration/coverage@10%": 0.022274798189823874,
"calibration/coverage@15%": 0.03594667318982388,
"calibration/coverage@20%": 0.062118548189823874,
"calibration/coverage@25%": 0.20910821306262228,
"calibration/coverage@30%": 0.32719927226027395,
"calibration/coverage@5%": 0.004296875,
"calibration/ece": 0.10505189502813113,
"calibration/mean_confidence": 0.47522064044153617,
"calibration/prompt_uniqueness": 0.7703621918899584,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1135.2,
"completions/max_terminated_length": 513.0,
"completions/mean_length": 191.45498046875,
"completions/mean_terminated_length": 190.92953491210938,
"completions/min_length": 73.4,
"completions/min_terminated_length": 73.4,
"epoch": 0.416,
"grad_norm": 0.0009625152451917529,
"learning_rate": 1e-06,
"loss": 0.0012,
"num_tokens": 439265083.0,
"reward": 0.7997597455978394,
"reward_std": 0.0898437261581421,
"rewards/accuracy_reward": 0.5013671875,
"rewards/brier_reward": 0.7667541861534118,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0030779951717704534,
"rewards/frontier_coverage_1": 0.10926591604948044,
"rewards/frontier_coverage_10": 0.10926591604948044,
"rewards/frontier_coverage_15": 0.10926591604948044,
"rewards/frontier_coverage_20": 0.10926591604948044,
"rewards/frontier_coverage_25": 0.10277043804526328,
"rewards/frontier_coverage_5": 0.10926591604948044,
"rewards/frontier_ece_reward": 0.007529846765100956,
"rewards/frontier_entropy_batch_reward": -0.03042619377374649,
"signal/accuracy_reward/centered_abs_mean": 0.1254150390625,
"signal/accuracy_reward/group_bin_occupancy": 0.18203125,
"signal/accuracy_reward/group_std_mean": 0.16262999475002288,
"signal/accuracy_reward/group_zero_std_frac": 0.54375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06270751953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06270751953125,
"signal/advantage_abs_mean": 0.0694778598845005,
"signal/advantage_pre_scale_abs_mean": 0.0694778598845005,
"signal/advantage_pre_scale_std": 0.11225654482841492,
"signal/advantage_std": 0.11225654482841492,
"signal/brier_reward/centered_abs_mean": 0.1590313732624054,
"signal/brier_reward/group_bin_occupancy": 0.83984375,
"signal/brier_reward/group_std_mean": 0.20204126536846162,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.008768082037568093,
"signal/brier_reward/weight": 0.055139760673046115,
"signal/brier_reward/weighted_centered_abs_mean": 0.008768082037568093,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_bin_occupancy": 0.127734375,
"signal/format_reward/group_std_mean": 0.003866990050300956,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00288281855173409,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7328125,
"signal/frontier_aurc_reward/group_std_mean": 0.004507267288863659,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.679482419509441e-05,
"signal/frontier_aurc_reward/weight": 0.01970335878431797,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.679482419509441e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2235151559114456,
"signal/frontier_coverage_1/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_1/group_std_mean": 0.28225297331809995,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003995717084035277,
"signal/frontier_coverage_1/weight": 0.017875409871339797,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003995717084035277,
"signal/frontier_coverage_10/centered_abs_mean": 0.2235151559114456,
"signal/frontier_coverage_10/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_10/group_std_mean": 0.28225297331809995,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003995717084035277,
"signal/frontier_coverage_10/weight": 0.017875409871339797,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003995717084035277,
"signal/frontier_coverage_15/centered_abs_mean": 0.2235151559114456,
"signal/frontier_coverage_15/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_15/group_std_mean": 0.28225297331809995,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003995717084035277,
"signal/frontier_coverage_15/weight": 0.017875409871339797,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003995717084035277,
"signal/frontier_coverage_20/centered_abs_mean": 0.2235151559114456,
"signal/frontier_coverage_20/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_20/group_std_mean": 0.28225297331809995,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003995717084035277,
"signal/frontier_coverage_20/weight": 0.017875409871339797,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003995717084035277,
"signal/frontier_coverage_25/centered_abs_mean": 0.20612691044807435,
"signal/frontier_coverage_25/group_bin_occupancy": 0.86796875,
"signal/frontier_coverage_25/group_std_mean": 0.26099815368652346,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036885889247059824,
"signal/frontier_coverage_25/weight": 0.017893340811133383,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036885889247059824,
"signal/frontier_coverage_5/centered_abs_mean": 0.2235151559114456,
"signal/frontier_coverage_5/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_5/group_std_mean": 0.28225297331809995,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003995717084035277,
"signal/frontier_coverage_5/weight": 0.017875409871339797,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003995717084035277,
"signal/frontier_ece_reward/centered_abs_mean": 0.013178033754229546,
"signal/frontier_ece_reward/group_bin_occupancy": 0.760546875,
"signal/frontier_ece_reward/group_std_mean": 0.016739048436284064,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017928671557456255,
"signal/frontier_ece_reward/weight": 0.1360406458377838,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017928671557456255,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06405057907104492,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.55703125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.0861910954117775,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011665340699255466,
"signal/frontier_entropy_batch_reward/weight": 0.18214584290981292,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011665340699255466,
"step": 130
},
{
"adaptive_ema/accuracy_reward": 0.40389062378254226,
"adaptive_ema/brier_reward": 0.6450117178895205,
"adaptive_ema/format_reward": 0.9024889395514306,
"adaptive_ema/frontier_aurc_reward": 0.09225001141692199,
"adaptive_ema/frontier_coverage_1": 0.17700410985275458,
"adaptive_ema/frontier_coverage_10": 0.17700410985275458,
"adaptive_ema/frontier_coverage_15": 0.17700410985275458,
"adaptive_ema/frontier_coverage_20": 0.17700410985275458,
"adaptive_ema/frontier_coverage_25": 0.17592252013390136,
"adaptive_ema/frontier_coverage_5": 0.17700410985275458,
"adaptive_ema/frontier_ece_reward": 0.10250125354670095,
"adaptive_ema/frontier_entropy_batch_reward": -0.18781069837660672,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.05418673381209373,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.019842178747057914,
"adaptive_weight/frontier_coverage_1": 0.017989566549658775,
"adaptive_weight/frontier_coverage_10": 0.017989566549658775,
"adaptive_weight/frontier_coverage_15": 0.017989566549658775,
"adaptive_weight/frontier_coverage_20": 0.017989566549658775,
"adaptive_weight/frontier_coverage_25": 0.018013209104537964,
"adaptive_weight/frontier_coverage_5": 0.017989566549658775,
"adaptive_weight/frontier_ece_reward": 0.13699791133403777,
"adaptive_weight/frontier_entropy_batch_reward": 0.1813121348619461,
"calibration/aurc": 0.2798047773126514,
"calibration/batch_distribution_entropy": 0.9594813797344054,
"calibration/batch_entropy_100bins": 0.6832254986653468,
"calibration/batch_entropy_10bins": 0.9594813797344054,
"calibration/batch_entropy_50bins": 0.7874470601692688,
"calibration/batch_uniqueness": 0.894732666015625,
"calibration/buffer_distribution_entropy": 0.9692746712793632,
"calibration/buffer_entropy_100bins": 0.7048346676881715,
"calibration/buffer_entropy_10bins": 0.9692746712793632,
"calibration/buffer_entropy_50bins": 0.8034103297172124,
"calibration/confidence_entropy": 0.4639365779377324,
"calibration/coverage@0%": 0.0046875,
"calibration/coverage@1%": 0.0046875,
"calibration/coverage@10%": 0.14921875,
"calibration/coverage@15%": 0.215625,
"calibration/coverage@20%": 0.282421875,
"calibration/coverage@25%": 0.351953125,
"calibration/coverage@30%": 0.516015625,
"calibration/coverage@5%": 0.05,
"calibration/ece": 0.117168359375,
"calibration/mean_confidence": 0.496183203125,
"calibration/prompt_uniqueness": 0.74169921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1146.8,
"completions/max_terminated_length": 494.2,
"completions/mean_length": 190.58916015625,
"completions/mean_terminated_length": 190.19492797851564,
"completions/min_length": 86.6,
"completions/min_terminated_length": 86.6,
"epoch": 0.432,
"grad_norm": 0.0014595247339457273,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 456231052.0,
"reward": 0.8136967062950134,
"reward_std": 0.08533284813165665,
"rewards/accuracy_reward": 0.53837890625,
"rewards/brier_reward": 0.7841731548309326,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.002618259796872735,
"rewards/frontier_coverage_1": 0.10985348746180534,
"rewards/frontier_coverage_10": 0.10985348746180534,
"rewards/frontier_coverage_15": 0.10985348746180534,
"rewards/frontier_coverage_20": 0.10985348746180534,
"rewards/frontier_coverage_25": 0.10386107414960861,
"rewards/frontier_coverage_5": 0.10985348746180534,
"rewards/frontier_ece_reward": 0.00937036368995905,
"rewards/frontier_entropy_batch_reward": -0.060450931265950206,
"signal/accuracy_reward/centered_abs_mean": 0.110687255859375,
"signal/accuracy_reward/group_bin_occupancy": 0.1796875,
"signal/accuracy_reward/group_std_mean": 0.14950263351202012,
"signal/accuracy_reward/group_zero_std_frac": 0.5625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0553436279296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0553436279296875,
"signal/advantage_abs_mean": 0.06546112969517708,
"signal/advantage_pre_scale_abs_mean": 0.06546112969517708,
"signal/advantage_pre_scale_std": 0.10559684187173843,
"signal/advantage_std": 0.10559684187173843,
"signal/brier_reward/centered_abs_mean": 0.15093303322792054,
"signal/brier_reward/group_bin_occupancy": 0.82421875,
"signal/brier_reward/group_std_mean": 0.1923585206270218,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.008179661072790622,
"signal/brier_reward/weight": 0.05418673381209373,
"signal/brier_reward/weighted_centered_abs_mean": 0.008179661072790622,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002759553166106343,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.719921875,
"signal/frontier_aurc_reward/group_std_mean": 0.004286598227918148,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.476091318996623e-05,
"signal/frontier_aurc_reward/weight": 0.019842178747057914,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.476091318996623e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.20907386541366577,
"signal/frontier_coverage_1/group_bin_occupancy": 0.85390625,
"signal/frontier_coverage_1/group_std_mean": 0.2689059257507324,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037610134109854697,
"signal/frontier_coverage_1/weight": 0.017989566549658775,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037610134109854697,
"signal/frontier_coverage_10/centered_abs_mean": 0.20907386541366577,
"signal/frontier_coverage_10/group_bin_occupancy": 0.85390625,
"signal/frontier_coverage_10/group_std_mean": 0.2689059257507324,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037610134109854697,
"signal/frontier_coverage_10/weight": 0.017989566549658775,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037610134109854697,
"signal/frontier_coverage_15/centered_abs_mean": 0.20907386541366577,
"signal/frontier_coverage_15/group_bin_occupancy": 0.85390625,
"signal/frontier_coverage_15/group_std_mean": 0.2689059257507324,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037610134109854697,
"signal/frontier_coverage_15/weight": 0.017989566549658775,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037610134109854697,
"signal/frontier_coverage_20/centered_abs_mean": 0.20907386541366577,
"signal/frontier_coverage_20/group_bin_occupancy": 0.85390625,
"signal/frontier_coverage_20/group_std_mean": 0.2689059257507324,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037610134109854697,
"signal/frontier_coverage_20/weight": 0.017989566549658775,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037610134109854697,
"signal/frontier_coverage_25/centered_abs_mean": 0.19255250692367554,
"signal/frontier_coverage_25/group_bin_occupancy": 0.85546875,
"signal/frontier_coverage_25/group_std_mean": 0.2472353994846344,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003468307899311185,
"signal/frontier_coverage_25/weight": 0.018013209104537964,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003468307899311185,
"signal/frontier_coverage_5/centered_abs_mean": 0.20907386541366577,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85390625,
"signal/frontier_coverage_5/group_std_mean": 0.2689059257507324,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037610134109854697,
"signal/frontier_coverage_5/weight": 0.017989566549658775,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037610134109854697,
"signal/frontier_ece_reward/centered_abs_mean": 0.01300532352179289,
"signal/frontier_ece_reward/group_bin_occupancy": 0.746875,
"signal/frontier_ece_reward/group_std_mean": 0.016403328627347946,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017816227162256836,
"signal/frontier_ece_reward/weight": 0.13699791133403777,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017816227162256836,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09048456139862537,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.55234375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.11550155356526375,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01640178356319666,
"signal/frontier_entropy_batch_reward/weight": 0.1813121348619461,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01640178356319666,
"step": 135
},
{
"adaptive_ema/accuracy_reward": 0.40901087552425236,
"adaptive_ema/brier_reward": 0.6516210816848998,
"adaptive_ema/format_reward": 0.907253467880264,
"adaptive_ema/frontier_aurc_reward": 0.08757274590309962,
"adaptive_ema/frontier_coverage_1": 0.17444973559060695,
"adaptive_ema/frontier_coverage_10": 0.17444973559060695,
"adaptive_ema/frontier_coverage_15": 0.17444973559060695,
"adaptive_ema/frontier_coverage_20": 0.17444973559060695,
"adaptive_ema/frontier_coverage_25": 0.1731865165107393,
"adaptive_ema/frontier_coverage_5": 0.17444973559060695,
"adaptive_ema/frontier_ece_reward": 0.09789230437137501,
"adaptive_ema/frontier_entropy_batch_reward": -0.18140109903372353,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.05326752662658692,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.01997806802392006,
"adaptive_weight/frontier_coverage_1": 0.01807584725320339,
"adaptive_weight/frontier_coverage_10": 0.01807584725320339,
"adaptive_weight/frontier_coverage_15": 0.01807584725320339,
"adaptive_weight/frontier_coverage_20": 0.01807584725320339,
"adaptive_weight/frontier_coverage_25": 0.01810350678861141,
"adaptive_weight/frontier_coverage_5": 0.01807584725320339,
"adaptive_weight/frontier_ece_reward": 0.13793377280235292,
"adaptive_weight/frontier_entropy_batch_reward": 0.18063787817955018,
"calibration/aurc": 0.2962632427860098,
"calibration/batch_distribution_entropy": 0.9570370964821228,
"calibration/batch_entropy_100bins": 0.6842356187694494,
"calibration/batch_entropy_10bins": 0.9570370964821228,
"calibration/batch_entropy_50bins": 0.7894500856200997,
"calibration/batch_uniqueness": 0.8935943603515625,
"calibration/buffer_distribution_entropy": 0.9709373362653174,
"calibration/buffer_entropy_100bins": 0.7063763335396203,
"calibration/buffer_entropy_10bins": 0.9709373362653174,
"calibration/buffer_entropy_50bins": 0.8050762497775716,
"calibration/confidence_entropy": 0.4888197990694086,
"calibration/coverage@0%": 0.00546875,
"calibration/coverage@1%": 0.00546875,
"calibration/coverage@10%": 0.07578125,
"calibration/coverage@15%": 0.109375,
"calibration/coverage@20%": 0.181640625,
"calibration/coverage@25%": 0.293359375,
"calibration/coverage@30%": 0.449609375,
"calibration/coverage@5%": 0.0265625,
"calibration/ece": 0.144036328125,
"calibration/mean_confidence": 0.529510546875,
"calibration/prompt_uniqueness": 0.774169921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 769.4,
"completions/max_terminated_length": 656.2,
"completions/mean_length": 197.3052734375,
"completions/mean_terminated_length": 196.52201232910156,
"completions/min_length": 89.8,
"completions/min_terminated_length": 89.8,
"epoch": 0.448,
"grad_norm": 0.0010398230515420437,
"learning_rate": 1e-06,
"loss": 0.0014,
"num_tokens": 473204258.0,
"reward": 0.802183473110199,
"reward_std": 0.08431367427110673,
"rewards/accuracy_reward": 0.50546875,
"rewards/brier_reward": 0.7795440316200256,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.0031814999878406526,
"rewards/frontier_coverage_1": 0.12466348260641098,
"rewards/frontier_coverage_10": 0.12466348260641098,
"rewards/frontier_coverage_15": 0.12466348260641098,
"rewards/frontier_coverage_20": 0.12466348260641098,
"rewards/frontier_coverage_25": 0.12040752172470093,
"rewards/frontier_coverage_5": 0.12466348260641098,
"rewards/frontier_ece_reward": 0.008025220409035683,
"rewards/frontier_entropy_batch_reward": -0.03533042259514332,
"signal/accuracy_reward/centered_abs_mean": 0.112109375,
"signal/accuracy_reward/group_bin_occupancy": 0.17734375,
"signal/accuracy_reward/group_std_mean": 0.14760722517967223,
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0560546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0560546875,
"signal/advantage_abs_mean": 0.06522256284952163,
"signal/advantage_pre_scale_abs_mean": 0.06522256284952163,
"signal/advantage_pre_scale_std": 0.10748258531093598,
"signal/advantage_std": 0.10748258531093598,
"signal/brier_reward/centered_abs_mean": 0.15731483101844787,
"signal/brier_reward/group_bin_occupancy": 0.836328125,
"signal/brier_reward/group_std_mean": 0.19994543492794037,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.008379863202571869,
"signal/brier_reward/weight": 0.05326752662658692,
"signal/brier_reward/weighted_centered_abs_mean": 0.008379863202571869,
"signal/format_reward/centered_abs_mean": 0.00101318359375,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0017052460461854935,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000506591796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000506591796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003091309033334255,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.702734375,
"signal/frontier_aurc_reward/group_std_mean": 0.0049549748189747335,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.173901856527663e-05,
"signal/frontier_aurc_reward/weight": 0.01997806802392006,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.173901856527663e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.21405775845050812,
"signal/frontier_coverage_1/group_bin_occupancy": 0.864453125,
"signal/frontier_coverage_1/group_std_mean": 0.27364026606082914,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038695004768669605,
"signal/frontier_coverage_1/weight": 0.01807584725320339,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038695004768669605,
"signal/frontier_coverage_10/centered_abs_mean": 0.21405775845050812,
"signal/frontier_coverage_10/group_bin_occupancy": 0.864453125,
"signal/frontier_coverage_10/group_std_mean": 0.27364026606082914,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038695004768669605,
"signal/frontier_coverage_10/weight": 0.01807584725320339,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038695004768669605,
"signal/frontier_coverage_15/centered_abs_mean": 0.21405775845050812,
"signal/frontier_coverage_15/group_bin_occupancy": 0.864453125,
"signal/frontier_coverage_15/group_std_mean": 0.27364026606082914,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038695004768669605,
"signal/frontier_coverage_15/weight": 0.01807584725320339,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038695004768669605,
"signal/frontier_coverage_20/centered_abs_mean": 0.21405775845050812,
"signal/frontier_coverage_20/group_bin_occupancy": 0.864453125,
"signal/frontier_coverage_20/group_std_mean": 0.27364026606082914,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038695004768669605,
"signal/frontier_coverage_20/weight": 0.01807584725320339,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038695004768669605,
"signal/frontier_coverage_25/centered_abs_mean": 0.19044365584850312,
"signal/frontier_coverage_25/group_bin_occupancy": 0.864453125,
"signal/frontier_coverage_25/group_std_mean": 0.24385970830917358,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034478874877095222,
"signal/frontier_coverage_25/weight": 0.01810350678861141,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034478874877095222,
"signal/frontier_coverage_5/centered_abs_mean": 0.21405775845050812,
"signal/frontier_coverage_5/group_bin_occupancy": 0.864453125,
"signal/frontier_coverage_5/group_std_mean": 0.27364026606082914,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038695004768669605,
"signal/frontier_coverage_5/weight": 0.01807584725320339,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038695004768669605,
"signal/frontier_ece_reward/centered_abs_mean": 0.01259579136967659,
"signal/frontier_ece_reward/group_bin_occupancy": 0.763671875,
"signal/frontier_ece_reward/group_std_mean": 0.015973252430558204,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017373360227793456,
"signal/frontier_ece_reward/weight": 0.13793377280235292,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017373360227793456,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0651637777686119,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.58046875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.08384880423545837,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011776900757104159,
"signal/frontier_entropy_batch_reward/weight": 0.18063787817955018,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011776900757104159,
"step": 140
},
{
"adaptive_ema/accuracy_reward": 0.41331912438572094,
"adaptive_ema/brier_reward": 0.6573604978214818,
"adaptive_ema/format_reward": 0.9117732413333242,
"adaptive_ema/frontier_aurc_reward": 0.08311509842616054,
"adaptive_ema/frontier_coverage_1": 0.17176355954378422,
"adaptive_ema/frontier_coverage_10": 0.17176355954378422,
"adaptive_ema/frontier_coverage_15": 0.17176355954378422,
"adaptive_ema/frontier_coverage_20": 0.17176355954378422,
"adaptive_ema/frontier_coverage_25": 0.1701984387803478,
"adaptive_ema/frontier_coverage_5": 0.17176355954378422,
"adaptive_ema/frontier_ece_reward": 0.0934477810133228,
"adaptive_ema/frontier_entropy_batch_reward": -0.17472138935692122,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.05246995091438293,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.020106296986341476,
"adaptive_weight/frontier_coverage_1": 0.018162321671843527,
"adaptive_weight/frontier_coverage_10": 0.018162321671843527,
"adaptive_weight/frontier_coverage_15": 0.018162321671843527,
"adaptive_weight/frontier_coverage_20": 0.018162321671843527,
"adaptive_weight/frontier_coverage_25": 0.0181966457515955,
"adaptive_weight/frontier_coverage_5": 0.018162321671843527,
"adaptive_weight/frontier_ece_reward": 0.1388248085975647,
"adaptive_weight/frontier_entropy_batch_reward": 0.1798906832933426,
"calibration/aurc": 0.4353696800721244,
"calibration/batch_distribution_entropy": 0.9603827125826234,
"calibration/batch_entropy_100bins": 0.6827533765788634,
"calibration/batch_entropy_10bins": 0.9603827125826234,
"calibration/batch_entropy_50bins": 0.7879613603432448,
"calibration/batch_uniqueness": 0.8935943603515625,
"calibration/buffer_distribution_entropy": 0.9722251449187385,
"calibration/buffer_entropy_100bins": 0.707843130215015,
"calibration/buffer_entropy_10bins": 0.9722251449187385,
"calibration/buffer_entropy_50bins": 0.8066064119008413,
"calibration/confidence_entropy": 0.48914448804111155,
"calibration/coverage@0%": 0.00234375,
"calibration/coverage@1%": 0.00234375,
"calibration/coverage@10%": 0.00234375,
"calibration/coverage@15%": 0.02890625,
"calibration/coverage@20%": 0.038671875,
"calibration/coverage@25%": 0.140234375,
"calibration/coverage@30%": 0.254296875,
"calibration/coverage@5%": 0.00234375,
"calibration/ece": 0.14485286458333332,
"calibration/mean_confidence": 0.4620638020833333,
"calibration/prompt_uniqueness": 0.758447265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 886.6,
"completions/max_terminated_length": 469.4,
"completions/mean_length": 198.75224609375,
"completions/mean_terminated_length": 198.3603759765625,
"completions/min_length": 85.8,
"completions/min_terminated_length": 85.8,
"epoch": 0.464,
"grad_norm": 0.0009978722082450986,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 490410297.0,
"reward": 0.7703938245773315,
"reward_std": 0.08229008316993713,
"rewards/accuracy_reward": 0.45380859375,
"rewards/brier_reward": 0.7603809714317322,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0039552615489810705,
"rewards/frontier_coverage_1": 0.14377658516168595,
"rewards/frontier_coverage_10": 0.14377658516168595,
"rewards/frontier_coverage_15": 0.14377658516168595,
"rewards/frontier_coverage_20": 0.14377658516168595,
"rewards/frontier_coverage_25": 0.12820575162768363,
"rewards/frontier_coverage_5": 0.14377658516168595,
"rewards/frontier_ece_reward": 0.006863876525312662,
"rewards/frontier_entropy_batch_reward": -0.07018293291330338,
"signal/accuracy_reward/centered_abs_mean": 0.094793701171875,
"signal/accuracy_reward/group_bin_occupancy": 0.173828125,
"signal/accuracy_reward/group_std_mean": 0.13010418564081191,
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0473968505859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0473968505859375,
"signal/advantage_abs_mean": 0.06305078566074371,
"signal/advantage_pre_scale_abs_mean": 0.06305078566074371,
"signal/advantage_pre_scale_std": 0.10240222066640854,
"signal/advantage_std": 0.10240222066640854,
"signal/brier_reward/centered_abs_mean": 0.15986269414424897,
"signal/brier_reward/group_bin_occupancy": 0.845703125,
"signal/brier_reward/group_std_mean": 0.20299112200737,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.008388944528996944,
"signal/brier_reward/weight": 0.05246995091438293,
"signal/brier_reward/weighted_centered_abs_mean": 0.008388944528996944,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0034380458761006593,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.691015625,
"signal/frontier_aurc_reward/group_std_mean": 0.005427456274628639,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.912858807481826e-05,
"signal/frontier_aurc_reward/weight": 0.020106296986341476,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.912858807481826e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.20949412882328033,
"signal/frontier_coverage_1/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_1/group_std_mean": 0.26894415616989137,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003804934723302722,
"signal/frontier_coverage_1/weight": 0.018162321671843527,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003804934723302722,
"signal/frontier_coverage_10/centered_abs_mean": 0.20949412882328033,
"signal/frontier_coverage_10/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_10/group_std_mean": 0.26894415616989137,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003804934723302722,
"signal/frontier_coverage_10/weight": 0.018162321671843527,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003804934723302722,
"signal/frontier_coverage_15/centered_abs_mean": 0.20949412882328033,
"signal/frontier_coverage_15/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_15/group_std_mean": 0.26894415616989137,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003804934723302722,
"signal/frontier_coverage_15/weight": 0.018162321671843527,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003804934723302722,
"signal/frontier_coverage_20/centered_abs_mean": 0.20949412882328033,
"signal/frontier_coverage_20/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_20/group_std_mean": 0.26894415616989137,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003804934723302722,
"signal/frontier_coverage_20/weight": 0.018162321671843527,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003804934723302722,
"signal/frontier_coverage_25/centered_abs_mean": 0.1845701038837433,
"signal/frontier_coverage_25/group_bin_occupancy": 0.871875,
"signal/frontier_coverage_25/group_std_mean": 0.2373584806919098,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00335861025378108,
"signal/frontier_coverage_25/weight": 0.0181966457515955,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00335861025378108,
"signal/frontier_coverage_5/centered_abs_mean": 0.20949412882328033,
"signal/frontier_coverage_5/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_5/group_std_mean": 0.26894415616989137,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003804934723302722,
"signal/frontier_coverage_5/weight": 0.018162321671843527,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003804934723302722,
"signal/frontier_ece_reward/centered_abs_mean": 0.012350363284349441,
"signal/frontier_ece_reward/group_bin_occupancy": 0.775390625,
"signal/frontier_ece_reward/group_std_mean": 0.015785422176122665,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001714510377496481,
"signal/frontier_ece_reward/weight": 0.1388248085975647,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001714510377496481,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10670888125896454,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.540234375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.13525095582008362,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019193359836935996,
"signal/frontier_entropy_batch_reward/weight": 0.1798906832933426,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019193359836935996,
"step": 145
},
{
"adaptive_ema/accuracy_reward": 0.4158360108171112,
"adaptive_ema/brier_reward": 0.662379968983462,
"adaptive_ema/format_reward": 0.9160760948369102,
"adaptive_ema/frontier_aurc_reward": 0.07885424522936033,
"adaptive_ema/frontier_coverage_1": 0.170121992197436,
"adaptive_ema/frontier_coverage_10": 0.170121992197436,
"adaptive_ema/frontier_coverage_15": 0.170121992197436,
"adaptive_ema/frontier_coverage_20": 0.170121992197436,
"adaptive_ema/frontier_coverage_25": 0.1679290612428575,
"adaptive_ema/frontier_coverage_5": 0.170121992197436,
"adaptive_ema/frontier_ece_reward": 0.0892171161257757,
"adaptive_ema/frontier_entropy_batch_reward": -0.16944830561177496,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.051763904839754106,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.02022419050335884,
"adaptive_weight/frontier_coverage_1": 0.018220360577106475,
"adaptive_weight/frontier_coverage_10": 0.018220360577106475,
"adaptive_weight/frontier_coverage_15": 0.018220360577106475,
"adaptive_weight/frontier_coverage_20": 0.018220360577106475,
"adaptive_weight/frontier_coverage_25": 0.018268508464097978,
"adaptive_weight/frontier_coverage_5": 0.018220360577106475,
"adaptive_weight/frontier_ece_reward": 0.13964154720306396,
"adaptive_weight/frontier_entropy_batch_reward": 0.17930004000663757,
"calibration/aurc": 0.30436075115770805,
"calibration/batch_distribution_entropy": 0.9680437886073019,
"calibration/batch_entropy_100bins": 0.6929306685782016,
"calibration/batch_entropy_10bins": 0.9680437886073019,
"calibration/batch_entropy_50bins": 0.7983824179166977,
"calibration/batch_uniqueness": 0.8958013507292947,
"calibration/buffer_distribution_entropy": 0.9731618760742794,
"calibration/buffer_entropy_100bins": 0.7091474549089483,
"calibration/buffer_entropy_10bins": 0.9731618760742794,
"calibration/buffer_entropy_50bins": 0.8079997262518148,
"calibration/confidence_entropy": 0.46800119826295006,
"calibration/coverage@0%": 0.00625,
"calibration/coverage@1%": 0.00625,
"calibration/coverage@10%": 0.042578125,
"calibration/coverage@15%": 0.13203125,
"calibration/coverage@20%": 0.33750458659491195,
"calibration/coverage@25%": 0.43634647137964777,
"calibration/coverage@30%": 0.4824532167318982,
"calibration/coverage@5%": 0.0140625,
"calibration/ece": 0.15203426183217936,
"calibration/mean_confidence": 0.478522489635531,
"calibration/prompt_uniqueness": 0.7592241459905047,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 926.2,
"completions/max_terminated_length": 550.8,
"completions/mean_length": 199.7640625,
"completions/mean_terminated_length": 199.37288513183594,
"completions/min_length": 94.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.48,
"grad_norm": 0.0010686474852263927,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 507503913.0,
"reward": 0.7970785617828369,
"reward_std": 0.0882499486207962,
"rewards/accuracy_reward": 0.5107421875,
"rewards/brier_reward": 0.7666718006134033,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0033504603896290066,
"rewards/frontier_coverage_1": 0.11392012536525727,
"rewards/frontier_coverage_10": 0.11392012536525727,
"rewards/frontier_coverage_15": 0.11392012536525727,
"rewards/frontier_coverage_20": 0.11392012536525727,
"rewards/frontier_coverage_25": 0.10631907731294632,
"rewards/frontier_coverage_5": 0.11392012536525727,
"rewards/frontier_ece_reward": 0.007587762642651796,
"rewards/frontier_entropy_batch_reward": -0.06213836595416069,
"signal/accuracy_reward/centered_abs_mean": 0.11937255859375,
"signal/accuracy_reward/group_bin_occupancy": 0.182421875,
"signal/accuracy_reward/group_std_mean": 0.15820908844470977,
"signal/accuracy_reward/group_zero_std_frac": 0.540625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.059686279296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.059686279296875,
"signal/advantage_abs_mean": 0.067903071641922,
"signal/advantage_pre_scale_abs_mean": 0.067903071641922,
"signal/advantage_pre_scale_std": 0.10903761386871338,
"signal/advantage_std": 0.10903761386871338,
"signal/brier_reward/centered_abs_mean": 0.1605544239282608,
"signal/brier_reward/group_bin_occupancy": 0.81796875,
"signal/brier_reward/group_std_mean": 0.204882350564003,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.008311640471220016,
"signal/brier_reward/weight": 0.051763904839754106,
"signal/brier_reward/weighted_centered_abs_mean": 0.008311640471220016,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032434612046927215,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.708984375,
"signal/frontier_aurc_reward/group_std_mean": 0.005121718998998404,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.559158791787922e-05,
"signal/frontier_aurc_reward/weight": 0.02022419050335884,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.559158791787922e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22300857603549956,
"signal/frontier_coverage_1/group_bin_occupancy": 0.842578125,
"signal/frontier_coverage_1/group_std_mean": 0.2856623888015747,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004063255805522204,
"signal/frontier_coverage_1/weight": 0.018220360577106475,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004063255805522204,
"signal/frontier_coverage_10/centered_abs_mean": 0.22300857603549956,
"signal/frontier_coverage_10/group_bin_occupancy": 0.842578125,
"signal/frontier_coverage_10/group_std_mean": 0.2856623888015747,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004063255805522204,
"signal/frontier_coverage_10/weight": 0.018220360577106475,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004063255805522204,
"signal/frontier_coverage_15/centered_abs_mean": 0.22300857603549956,
"signal/frontier_coverage_15/group_bin_occupancy": 0.842578125,
"signal/frontier_coverage_15/group_std_mean": 0.2856623888015747,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004063255805522204,
"signal/frontier_coverage_15/weight": 0.018220360577106475,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004063255805522204,
"signal/frontier_coverage_20/centered_abs_mean": 0.22300857603549956,
"signal/frontier_coverage_20/group_bin_occupancy": 0.842578125,
"signal/frontier_coverage_20/group_std_mean": 0.2856623888015747,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004063255805522204,
"signal/frontier_coverage_20/weight": 0.018220360577106475,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004063255805522204,
"signal/frontier_coverage_25/centered_abs_mean": 0.19893572628498077,
"signal/frontier_coverage_25/group_bin_occupancy": 0.837109375,
"signal/frontier_coverage_25/group_std_mean": 0.25566576421260834,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003634166205301881,
"signal/frontier_coverage_25/weight": 0.018268508464097978,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003634166205301881,
"signal/frontier_coverage_5/centered_abs_mean": 0.22300857603549956,
"signal/frontier_coverage_5/group_bin_occupancy": 0.842578125,
"signal/frontier_coverage_5/group_std_mean": 0.2856623888015747,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004063255805522204,
"signal/frontier_coverage_5/weight": 0.018220360577106475,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004063255805522204,
"signal/frontier_ece_reward/centered_abs_mean": 0.01263709794729948,
"signal/frontier_ece_reward/group_bin_occupancy": 0.763671875,
"signal/frontier_ece_reward/group_std_mean": 0.016002984531223775,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017646895488724113,
"signal/frontier_ece_reward/weight": 0.13964154720306396,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017646895488724113,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08762557096779347,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5359375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.11139658465981483,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015710269659757616,
"signal/frontier_entropy_batch_reward/weight": 0.17930004000663757,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015710269659757616,
"step": 150
},
{
"epoch": 0.48,
"eval_calibration/aurc": 0.5294038132117689,
"eval_calibration/batch_distribution_entropy": 0.9063057403122545,
"eval_calibration/batch_entropy_100bins": 0.565109525945698,
"eval_calibration/batch_entropy_10bins": 0.9063057403122545,
"eval_calibration/batch_entropy_50bins": 0.6576108922402973,
"eval_calibration/batch_uniqueness": 0.82421875,
"eval_calibration/buffer_distribution_entropy": 0.9738875723935155,
"eval_calibration/buffer_entropy_100bins": 0.7100046469801263,
"eval_calibration/buffer_entropy_10bins": 0.9738875723935155,
"eval_calibration/buffer_entropy_50bins": 0.8088772762097054,
"eval_calibration/confidence_entropy": 0.455809067666528,
"eval_calibration/coverage@0%": 0.015625,
"eval_calibration/coverage@1%": 0.015625,
"eval_calibration/coverage@10%": 0.015625,
"eval_calibration/coverage@15%": 0.015625,
"eval_calibration/coverage@20%": 0.0625,
"eval_calibration/coverage@25%": 0.0859375,
"eval_calibration/coverage@30%": 0.09375,
"eval_calibration/coverage@5%": 0.015625,
"eval_calibration/ece": 0.191640625,
"eval_calibration/mean_confidence": 0.423828125,
"eval_calibration/prompt_uniqueness": 0.82421875,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 342.25,
"eval_completions/max_terminated_length": 342.25,
"eval_completions/mean_length": 201.73026657104492,
"eval_completions/mean_terminated_length": 201.73026657104492,
"eval_completions/min_length": 102.25,
"eval_completions/min_terminated_length": 102.25,
"eval_loss": 0.0,
"eval_num_tokens": 507503913.0,
"eval_reward": 0.7298808097839355,
"eval_reward_std": 0.2223692536354065,
"eval_rewards/accuracy_reward": 0.4140625,
"eval_rewards/brier_reward": 0.7809019684791565,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0038445236277766526,
"eval_rewards/frontier_coverage_1": 0.1827796958386898,
"eval_rewards/frontier_coverage_10": 0.1827796958386898,
"eval_rewards/frontier_coverage_15": 0.1827796958386898,
"eval_rewards/frontier_coverage_20": 0.1827796958386898,
"eval_rewards/frontier_coverage_25": 0.14920702949166298,
"eval_rewards/frontier_coverage_5": 0.1827796958386898,
"eval_rewards/frontier_ece_reward": 0.00868905265815556,
"eval_rewards/frontier_entropy_batch_reward": -0.2114410400390625,
"eval_runtime": 19.1534,
"eval_samples_per_second": 26.105,
"eval_signal/accuracy_reward/centered_abs_mean": 0.465087890625,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.4892200380563736,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2325439453125,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2325439453125,
"eval_signal/advantage_abs_mean": 0.1968984156847,
"eval_signal/advantage_pre_scale_abs_mean": 0.1968984156847,
"eval_signal/advantage_pre_scale_std": 0.22023877874016762,
"eval_signal/advantage_std": 0.22023877874016762,
"eval_signal/brier_reward/centered_abs_mean": 0.1967528983950615,
"eval_signal/brier_reward/group_bin_occupancy": 0.875,
"eval_signal/brier_reward/group_std_mean": 0.25220654532313347,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010119047947227955,
"eval_signal/brier_reward/weight": 0.05143023654818535,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.010119047947227955,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004891375545412302,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.734375,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008528004633262753,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.914291149470955e-05,
"eval_signal/frontier_aurc_reward/weight": 0.020268922671675682,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.914291149470955e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3524972200393677,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4465137869119644,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006434518378227949,
"eval_signal/frontier_coverage_1/weight": 0.018254097551107407,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006434518378227949,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3524972200393677,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4465137869119644,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006434518378227949,
"eval_signal/frontier_coverage_10/weight": 0.018254097551107407,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006434518378227949,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3524972200393677,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4465137869119644,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006434518378227949,
"eval_signal/frontier_coverage_15/weight": 0.018254097551107407,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006434518378227949,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3524972200393677,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_coverage_20/group_std_mean": 0.4465137869119644,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006434518378227949,
"eval_signal/frontier_coverage_20/weight": 0.018254097551107407,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006434518378227949,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.29581238329410553,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9140625,
"eval_signal/frontier_coverage_25/group_std_mean": 0.37883658707141876,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005414659972302616,
"eval_signal/frontier_coverage_25/weight": 0.0183043722063303,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005414659972302616,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3524972200393677,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4465137869119644,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006434518378227949,
"eval_signal/frontier_coverage_5/weight": 0.018254097551107407,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006434518378227949,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.015601360471919179,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9375,
"eval_signal/frontier_ece_reward/group_std_mean": 0.019876172300428152,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021834074286744,
"eval_signal/frontier_ece_reward/weight": 0.13994981348514557,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021834074286744,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3039870262145996,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5390625,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.3767779543995857,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.05443682614713907,
"eval_signal/frontier_entropy_batch_reward/weight": 0.1790761500597,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.05443682614713907,
"eval_steps_per_second": 0.209,
"step": 150
},
{
"adaptive_ema/accuracy_reward": 0.4216893837907655,
"adaptive_ema/brier_reward": 0.6677967837015515,
"adaptive_ema/format_reward": 0.9201758669871953,
"adaptive_ema/frontier_aurc_reward": 0.07483301245139518,
"adaptive_ema/frontier_coverage_1": 0.16652975308326617,
"adaptive_ema/frontier_coverage_10": 0.16652975308326617,
"adaptive_ema/frontier_coverage_15": 0.16652975308326617,
"adaptive_ema/frontier_coverage_20": 0.16652975308326617,
"adaptive_ema/frontier_coverage_25": 0.1637826024910254,
"adaptive_ema/frontier_coverage_5": 0.16652975308326617,
"adaptive_ema/frontier_ece_reward": 0.08521242073109674,
"adaptive_ema/frontier_entropy_batch_reward": -0.1634185454626772,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.05099123567342758,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.02033553533256054,
"adaptive_weight/frontier_coverage_1": 0.018320006504654884,
"adaptive_weight/frontier_coverage_10": 0.018320006504654884,
"adaptive_weight/frontier_coverage_15": 0.018320006504654884,
"adaptive_weight/frontier_coverage_20": 0.018320006504654884,
"adaptive_weight/frontier_coverage_25": 0.01838039085268974,
"adaptive_weight/frontier_coverage_5": 0.018320006504654884,
"adaptive_weight/frontier_ece_reward": 0.14041475653648378,
"adaptive_weight/frontier_entropy_batch_reward": 0.17857804894447327,
"calibration/aurc": 0.41690815238153345,
"calibration/batch_distribution_entropy": 0.9677657457156361,
"calibration/batch_entropy_100bins": 0.6856321179224288,
"calibration/batch_entropy_10bins": 0.9677657457156361,
"calibration/batch_entropy_50bins": 0.7908586413950005,
"calibration/batch_uniqueness": 0.8968597412109375,
"calibration/buffer_distribution_entropy": 0.9741904402212562,
"calibration/buffer_entropy_100bins": 0.7102141481794881,
"calibration/buffer_entropy_10bins": 0.9741904402212562,
"calibration/buffer_entropy_50bins": 0.8090575669865515,
"calibration/confidence_entropy": 0.4929868712618986,
"calibration/coverage@0%": 0.001171875,
"calibration/coverage@1%": 0.001171875,
"calibration/coverage@10%": 0.05390625,
"calibration/coverage@15%": 0.116015625,
"calibration/coverage@20%": 0.139453125,
"calibration/coverage@25%": 0.17890625,
"calibration/coverage@30%": 0.24375,
"calibration/coverage@5%": 0.01875,
"calibration/ece": 0.14474409101562496,
"calibration/mean_confidence": 0.505406690234375,
"calibration/prompt_uniqueness": 0.767236328125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 926.4,
"completions/max_terminated_length": 488.4,
"completions/mean_length": 203.50927734375,
"completions/mean_terminated_length": 203.1182373046875,
"completions/min_length": 93.2,
"completions/min_terminated_length": 93.2,
"epoch": 0.496,
"grad_norm": 0.0009186447714455426,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 524895688.0,
"reward": 0.8125284075736999,
"reward_std": 0.08393937200307847,
"rewards/accuracy_reward": 0.53896484375,
"rewards/brier_reward": 0.7696381568908691,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.003218067158013582,
"rewards/frontier_coverage_1": 0.08215026259422302,
"rewards/frontier_coverage_10": 0.08215026259422302,
"rewards/frontier_coverage_15": 0.08215026259422302,
"rewards/frontier_coverage_20": 0.08215026259422302,
"rewards/frontier_coverage_25": 0.06435679569840431,
"rewards/frontier_coverage_5": 0.08215026259422302,
"rewards/frontier_ece_reward": 0.006951355841010809,
"rewards/frontier_entropy_batch_reward": -0.03228703960776329,
"signal/accuracy_reward/centered_abs_mean": 0.106158447265625,
"signal/accuracy_reward/group_bin_occupancy": 0.17421875,
"signal/accuracy_reward/group_std_mean": 0.14007158279418946,
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0530792236328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0530792236328125,
"signal/advantage_abs_mean": 0.06463491767644883,
"signal/advantage_pre_scale_abs_mean": 0.06463491767644883,
"signal/advantage_pre_scale_std": 0.10683177411556244,
"signal/advantage_std": 0.10683177411556244,
"signal/brier_reward/centered_abs_mean": 0.1546708345413208,
"signal/brier_reward/group_bin_occupancy": 0.834765625,
"signal/brier_reward/group_std_mean": 0.19565778970718384,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0078872368671,
"signal/brier_reward/weight": 0.05099123567342758,
"signal/brier_reward/weighted_centered_abs_mean": 0.0078872368671,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003242273861542344,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.712109375,
"signal/frontier_aurc_reward/group_std_mean": 0.005140113364905119,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.592829740839078e-05,
"signal/frontier_aurc_reward/weight": 0.02033553533256054,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.592829740839078e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19634953141212463,
"signal/frontier_coverage_1/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_1/group_std_mean": 0.2511405676603317,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035970427095890047,
"signal/frontier_coverage_1/weight": 0.018320006504654884,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035970427095890047,
"signal/frontier_coverage_10/centered_abs_mean": 0.19634953141212463,
"signal/frontier_coverage_10/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_10/group_std_mean": 0.2511405676603317,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035970427095890047,
"signal/frontier_coverage_10/weight": 0.018320006504654884,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035970427095890047,
"signal/frontier_coverage_15/centered_abs_mean": 0.19634953141212463,
"signal/frontier_coverage_15/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_15/group_std_mean": 0.2511405676603317,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035970427095890047,
"signal/frontier_coverage_15/weight": 0.018320006504654884,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035970427095890047,
"signal/frontier_coverage_20/centered_abs_mean": 0.19634953141212463,
"signal/frontier_coverage_20/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_20/group_std_mean": 0.2511405676603317,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035970427095890047,
"signal/frontier_coverage_20/weight": 0.018320006504654884,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035970427095890047,
"signal/frontier_coverage_25/centered_abs_mean": 0.158232381939888,
"signal/frontier_coverage_25/group_bin_occupancy": 0.84140625,
"signal/frontier_coverage_25/group_std_mean": 0.20365700125694275,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002908079931512475,
"signal/frontier_coverage_25/weight": 0.01838039085268974,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002908079931512475,
"signal/frontier_coverage_5/centered_abs_mean": 0.19634953141212463,
"signal/frontier_coverage_5/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_5/group_std_mean": 0.2511405676603317,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035970427095890047,
"signal/frontier_coverage_5/weight": 0.018320006504654884,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035970427095890047,
"signal/frontier_ece_reward/centered_abs_mean": 0.011543996259570121,
"signal/frontier_ece_reward/group_bin_occupancy": 0.771875,
"signal/frontier_ece_reward/group_std_mean": 0.014726097695529461,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016208942281082273,
"signal/frontier_ece_reward/weight": 0.14041475653648378,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016208942281082273,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06659752577543258,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.590234375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.08772371038794517,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011892184615135193,
"signal/frontier_entropy_batch_reward/weight": 0.17857804894447327,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011892184615135193,
"step": 155
},
{
"adaptive_ema/accuracy_reward": 0.42775765798098997,
"adaptive_ema/brier_reward": 0.6731288846155358,
"adaptive_ema/format_reward": 0.9240622182023328,
"adaptive_ema/frontier_aurc_reward": 0.0710203393077371,
"adaptive_ema/frontier_coverage_1": 0.1625095130501547,
"adaptive_ema/frontier_coverage_10": 0.1625095130501547,
"adaptive_ema/frontier_coverage_15": 0.1625095130501547,
"adaptive_ema/frontier_coverage_20": 0.16245797037544615,
"adaptive_ema/frontier_coverage_25": 0.1589048029890912,
"adaptive_ema/frontier_coverage_5": 0.1625095130501547,
"adaptive_ema/frontier_ece_reward": 0.08139468145157765,
"adaptive_ema/frontier_entropy_batch_reward": -0.1572926740027641,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.050226838886737825,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.020441357418894766,
"adaptive_weight/frontier_coverage_1": 0.018428217619657516,
"adaptive_weight/frontier_coverage_10": 0.018428217619657516,
"adaptive_weight/frontier_coverage_15": 0.018428217619657516,
"adaptive_weight/frontier_coverage_20": 0.018429351970553398,
"adaptive_weight/frontier_coverage_25": 0.018507539853453638,
"adaptive_weight/frontier_coverage_5": 0.018428217619657516,
"adaptive_weight/frontier_ece_reward": 0.1411527931690216,
"adaptive_weight/frontier_entropy_batch_reward": 0.17782924175262452,
"calibration/aurc": 0.3364546318335065,
"calibration/batch_distribution_entropy": 0.9636604192444294,
"calibration/batch_entropy_100bins": 0.6906049356265781,
"calibration/batch_entropy_10bins": 0.9636604192444294,
"calibration/batch_entropy_50bins": 0.7936943544550781,
"calibration/batch_uniqueness": 0.8961584303052467,
"calibration/buffer_distribution_entropy": 0.9748793949875173,
"calibration/buffer_entropy_100bins": 0.7105535912341152,
"calibration/buffer_entropy_10bins": 0.9748793949875173,
"calibration/buffer_entropy_50bins": 0.8093978297658875,
"calibration/confidence_entropy": 0.49873162920203085,
"calibration/coverage@0%": 0.010546875,
"calibration/coverage@1%": 0.010546875,
"calibration/coverage@10%": 0.126953125,
"calibration/coverage@15%": 0.203515625,
"calibration/coverage@20%": 0.3008393468688845,
"calibration/coverage@25%": 0.3864450831702544,
"calibration/coverage@30%": 0.45917089652641874,
"calibration/coverage@5%": 0.055859375,
"calibration/ece": 0.13006195507815585,
"calibration/mean_confidence": 0.4916528810275193,
"calibration/prompt_uniqueness": 0.77803577816077,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 927.6,
"completions/max_terminated_length": 495.2,
"completions/mean_length": 202.1953125,
"completions/mean_terminated_length": 201.80372009277343,
"completions/min_length": 95.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.512,
"grad_norm": 0.0009540948085486889,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 542111832.0,
"reward": 0.8123380541801453,
"reward_std": 0.08555371910333634,
"rewards/accuracy_reward": 0.53466796875,
"rewards/brier_reward": 0.7846086502075196,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.002941929781809449,
"rewards/frontier_coverage_1": 0.10255831480026245,
"rewards/frontier_coverage_10": 0.10255831480026245,
"rewards/frontier_coverage_15": 0.10255831480026245,
"rewards/frontier_coverage_20": 0.10150663703680038,
"rewards/frontier_coverage_25": 0.07860263586044311,
"rewards/frontier_coverage_5": 0.10255831480026245,
"rewards/frontier_ece_reward": 0.007728977501392365,
"rewards/frontier_entropy_batch_reward": -0.035049394518136975,
"signal/accuracy_reward/centered_abs_mean": 0.111737060546875,
"signal/accuracy_reward/group_bin_occupancy": 0.178515625,
"signal/accuracy_reward/group_std_mean": 0.14923475980758666,
"signal/accuracy_reward/group_zero_std_frac": 0.571875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0558685302734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0558685302734375,
"signal/advantage_abs_mean": 0.0649673268198967,
"signal/advantage_pre_scale_abs_mean": 0.0649673268198967,
"signal/advantage_pre_scale_std": 0.10899066478013993,
"signal/advantage_std": 0.10899066478013993,
"signal/brier_reward/centered_abs_mean": 0.14757270514965057,
"signal/brier_reward/group_bin_occupancy": 0.832421875,
"signal/brier_reward/group_std_mean": 0.19025132954120635,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.007413102779537439,
"signal/brier_reward/weight": 0.050226838886737825,
"signal/brier_reward/weighted_centered_abs_mean": 0.007413102779537439,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032049688510596753,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6890625,
"signal/frontier_aurc_reward/group_std_mean": 0.005401439126580953,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.551606275024824e-05,
"signal/frontier_aurc_reward/weight": 0.020441357418894766,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.551606275024824e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18966628313064576,
"signal/frontier_coverage_1/group_bin_occupancy": 0.862109375,
"signal/frontier_coverage_1/group_std_mean": 0.2492189884185791,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003495160723105073,
"signal/frontier_coverage_1/weight": 0.018428217619657516,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003495160723105073,
"signal/frontier_coverage_10/centered_abs_mean": 0.18966628313064576,
"signal/frontier_coverage_10/group_bin_occupancy": 0.862109375,
"signal/frontier_coverage_10/group_std_mean": 0.2492189884185791,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003495160723105073,
"signal/frontier_coverage_10/weight": 0.018428217619657516,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003495160723105073,
"signal/frontier_coverage_15/centered_abs_mean": 0.18966628313064576,
"signal/frontier_coverage_15/group_bin_occupancy": 0.862109375,
"signal/frontier_coverage_15/group_std_mean": 0.2492189884185791,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003495160723105073,
"signal/frontier_coverage_15/weight": 0.018428217619657516,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003495160723105073,
"signal/frontier_coverage_20/centered_abs_mean": 0.1885404407978058,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8625,
"signal/frontier_coverage_20/group_std_mean": 0.2477384924888611,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034746643155813216,
"signal/frontier_coverage_20/weight": 0.018429351970553398,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034746643155813216,
"signal/frontier_coverage_25/centered_abs_mean": 0.1385777235031128,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8453125,
"signal/frontier_coverage_25/group_std_mean": 0.1832002341747284,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002564464882016182,
"signal/frontier_coverage_25/weight": 0.018507539853453638,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002564464882016182,
"signal/frontier_coverage_5/centered_abs_mean": 0.18966628313064576,
"signal/frontier_coverage_5/group_bin_occupancy": 0.862109375,
"signal/frontier_coverage_5/group_std_mean": 0.2492189884185791,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003495160723105073,
"signal/frontier_coverage_5/weight": 0.018428217619657516,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003495160723105073,
"signal/frontier_ece_reward/centered_abs_mean": 0.011046069860458373,
"signal/frontier_ece_reward/group_bin_occupancy": 0.776953125,
"signal/frontier_ece_reward/group_std_mean": 0.014163880608975887,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015591410920023918,
"signal/frontier_ece_reward/weight": 0.1411527931690216,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015591410920023918,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06716678887605668,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.61640625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.08615255355834961,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011946601420640945,
"signal/frontier_entropy_batch_reward/weight": 0.17782924175262452,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011946601420640945,
"step": 160
},
{
"adaptive_ema/accuracy_reward": 0.4325205851242789,
"adaptive_ema/brier_reward": 0.6788407846972966,
"adaptive_ema/format_reward": 0.9277763152493026,
"adaptive_ema/frontier_aurc_reward": 0.06739938548177218,
"adaptive_ema/frontier_coverage_1": 0.1604166561415282,
"adaptive_ema/frontier_coverage_10": 0.1604166561415282,
"adaptive_ema/frontier_coverage_15": 0.1604166561415282,
"adaptive_ema/frontier_coverage_20": 0.16033269429267963,
"adaptive_ema/frontier_coverage_25": 0.1555716756944678,
"adaptive_ema/frontier_coverage_5": 0.1604166561415282,
"adaptive_ema/frontier_ece_reward": 0.07779708224033675,
"adaptive_ema/frontier_entropy_batch_reward": -0.15159694629667597,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.049429801851511,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.0205545574426651,
"adaptive_weight/frontier_coverage_1": 0.01850444935262203,
"adaptive_weight/frontier_coverage_10": 0.01850444935262203,
"adaptive_weight/frontier_coverage_15": 0.01850444935262203,
"adaptive_weight/frontier_coverage_20": 0.018506300821900367,
"adaptive_weight/frontier_coverage_25": 0.01861123591661453,
"adaptive_weight/frontier_coverage_5": 0.01850444935262203,
"adaptive_weight/frontier_ece_reward": 0.14193709790706635,
"adaptive_weight/frontier_entropy_batch_reward": 0.1772432029247284,
"calibration/aurc": 0.22409343936084275,
"calibration/batch_distribution_entropy": 0.9689437318170097,
"calibration/batch_entropy_100bins": 0.6919291075244673,
"calibration/batch_entropy_10bins": 0.9689437318170097,
"calibration/batch_entropy_50bins": 0.7959131008887249,
"calibration/batch_uniqueness": 0.8983062744140625,
"calibration/buffer_distribution_entropy": 0.9754367199459588,
"calibration/buffer_entropy_100bins": 0.7101681510025485,
"calibration/buffer_entropy_10bins": 0.9754367199459588,
"calibration/buffer_entropy_50bins": 0.8092072534086444,
"calibration/confidence_entropy": 0.47571831432345046,
"calibration/coverage@0%": 0.01328125,
"calibration/coverage@1%": 0.01328125,
"calibration/coverage@10%": 0.209375,
"calibration/coverage@15%": 0.387109375,
"calibration/coverage@20%": 0.5140625,
"calibration/coverage@25%": 0.62734375,
"calibration/coverage@30%": 0.703125,
"calibration/coverage@5%": 0.084375,
"calibration/ece": 0.12249218750000002,
"calibration/mean_confidence": 0.5035312499999999,
"calibration/prompt_uniqueness": 0.755419921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 471.0,
"completions/max_terminated_length": 471.0,
"completions/mean_length": 204.42216796875,
"completions/mean_terminated_length": 204.42216796875,
"completions/min_length": 92.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.528,
"grad_norm": 0.0020615458488464355,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 559234651.0,
"reward": 0.809805941581726,
"reward_std": 0.08527890592813492,
"rewards/accuracy_reward": 0.5328125,
"rewards/brier_reward": 0.7901050925254822,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002666196506470442,
"rewards/frontier_coverage_1": 0.11995811313390732,
"rewards/frontier_coverage_10": 0.11995811313390732,
"rewards/frontier_coverage_15": 0.11995811313390732,
"rewards/frontier_coverage_20": 0.11858220845460891,
"rewards/frontier_coverage_25": 0.08949183002114296,
"rewards/frontier_coverage_5": 0.11995811313390732,
"rewards/frontier_ece_reward": 0.007980644144117833,
"rewards/frontier_entropy_batch_reward": -0.05378929451107979,
"signal/accuracy_reward/centered_abs_mean": 0.1185791015625,
"signal/accuracy_reward/group_bin_occupancy": 0.178125,
"signal/accuracy_reward/group_std_mean": 0.15306617319583893,
"signal/accuracy_reward/group_zero_std_frac": 0.575,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05928955078125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05928955078125,
"signal/advantage_abs_mean": 0.06676331087946892,
"signal/advantage_pre_scale_abs_mean": 0.06676331087946892,
"signal/advantage_pre_scale_std": 0.1093256339430809,
"signal/advantage_std": 0.1093256339430809,
"signal/brier_reward/centered_abs_mean": 0.14486051201820374,
"signal/brier_reward/group_bin_occupancy": 0.81328125,
"signal/brier_reward/group_std_mean": 0.18578538298606873,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.007160227932035923,
"signal/brier_reward/weight": 0.049429801851511,
"signal/brier_reward/weighted_centered_abs_mean": 0.007160227932035923,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002835435047745705,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.689453125,
"signal/frontier_aurc_reward/group_std_mean": 0.004600296774879098,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.827866843901575e-05,
"signal/frontier_aurc_reward/weight": 0.0205545574426651,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.827866843901575e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2021091252565384,
"signal/frontier_coverage_1/group_bin_occupancy": 0.84921875,
"signal/frontier_coverage_1/group_std_mean": 0.2615320235490799,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037399885710328817,
"signal/frontier_coverage_1/weight": 0.01850444935262203,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037399885710328817,
"signal/frontier_coverage_10/centered_abs_mean": 0.2021091252565384,
"signal/frontier_coverage_10/group_bin_occupancy": 0.84921875,
"signal/frontier_coverage_10/group_std_mean": 0.2615320235490799,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037399885710328817,
"signal/frontier_coverage_10/weight": 0.01850444935262203,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037399885710328817,
"signal/frontier_coverage_15/centered_abs_mean": 0.2021091252565384,
"signal/frontier_coverage_15/group_bin_occupancy": 0.84921875,
"signal/frontier_coverage_15/group_std_mean": 0.2615320235490799,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037399885710328817,
"signal/frontier_coverage_15/weight": 0.01850444935262203,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037399885710328817,
"signal/frontier_coverage_20/centered_abs_mean": 0.1976030260324478,
"signal/frontier_coverage_20/group_bin_occupancy": 0.84609375,
"signal/frontier_coverage_20/group_std_mean": 0.2558705747127533,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036569551099091767,
"signal/frontier_coverage_20/weight": 0.018506300821900367,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036569551099091767,
"signal/frontier_coverage_25/centered_abs_mean": 0.1361584484577179,
"signal/frontier_coverage_25/group_bin_occupancy": 0.84140625,
"signal/frontier_coverage_25/group_std_mean": 0.17853571772575377,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002534055197611451,
"signal/frontier_coverage_25/weight": 0.01861123591661453,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002534055197611451,
"signal/frontier_coverage_5/centered_abs_mean": 0.2021091252565384,
"signal/frontier_coverage_5/group_bin_occupancy": 0.84921875,
"signal/frontier_coverage_5/group_std_mean": 0.2615320235490799,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037399885710328817,
"signal/frontier_coverage_5/weight": 0.01850444935262203,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037399885710328817,
"signal/frontier_ece_reward/centered_abs_mean": 0.01067428793758154,
"signal/frontier_ece_reward/group_bin_occupancy": 0.76875,
"signal/frontier_ece_reward/group_std_mean": 0.01351653877645731,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015151004772633314,
"signal/frontier_ece_reward/weight": 0.14193709790706635,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015151004772633314,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07872896939516068,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.567578125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.10164814293384553,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013953628391027451,
"signal/frontier_entropy_batch_reward/weight": 0.1772432029247284,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013953628391027451,
"step": 165
},
{
"adaptive_ema/accuracy_reward": 0.4383067569373906,
"adaptive_ema/brier_reward": 0.6838993677795594,
"adaptive_ema/format_reward": 0.931300554007238,
"adaptive_ema/frontier_aurc_reward": 0.06395956977261932,
"adaptive_ema/frontier_coverage_1": 0.15742099640782753,
"adaptive_ema/frontier_coverage_10": 0.15742099640782753,
"adaptive_ema/frontier_coverage_15": 0.15742099640782753,
"adaptive_ema/frontier_coverage_20": 0.157249171840029,
"adaptive_ema/frontier_coverage_25": 0.15150339712214747,
"adaptive_ema/frontier_coverage_5": 0.15742099640782753,
"adaptive_ema/frontier_ece_reward": 0.07434109182543788,
"adaptive_ema/frontier_entropy_batch_reward": -0.14720343264181412,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.04869266897439957,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.020647920295596124,
"adaptive_weight/frontier_coverage_1": 0.01858627498149872,
"adaptive_weight/frontier_coverage_10": 0.01858627498149872,
"adaptive_weight/frontier_coverage_15": 0.01858627498149872,
"adaptive_weight/frontier_coverage_20": 0.018590064719319344,
"adaptive_weight/frontier_coverage_25": 0.018716811016201974,
"adaptive_weight/frontier_coverage_5": 0.01858627498149872,
"adaptive_weight/frontier_ece_reward": 0.1425902009010315,
"adaptive_weight/frontier_entropy_batch_reward": 0.17671723365783693,
"calibration/aurc": 0.25509466056467095,
"calibration/batch_distribution_entropy": 0.9725157625345979,
"calibration/batch_entropy_100bins": 0.7019710805310249,
"calibration/batch_entropy_10bins": 0.9725157625345979,
"calibration/batch_entropy_50bins": 0.8013937346369447,
"calibration/batch_uniqueness": 0.8974367551701217,
"calibration/buffer_distribution_entropy": 0.9759876725866707,
"calibration/buffer_entropy_100bins": 0.7097671052229481,
"calibration/buffer_entropy_10bins": 0.9759876725866707,
"calibration/buffer_entropy_50bins": 0.808892690946708,
"calibration/confidence_entropy": 0.450354472679105,
"calibration/coverage@0%": 0.011728687622309197,
"calibration/coverage@1%": 0.011728687622309197,
"calibration/coverage@10%": 0.08204118762230919,
"calibration/coverage@15%": 0.2794092465753425,
"calibration/coverage@20%": 0.4244251467710372,
"calibration/coverage@25%": 0.5561261619373777,
"calibration/coverage@30%": 0.6436651479941291,
"calibration/coverage@5%": 0.011728687622309197,
"calibration/ece": 0.11073061399217224,
"calibration/mean_confidence": 0.4943483213062622,
"calibration/prompt_uniqueness": 0.7487230505332987,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1145.2,
"completions/max_terminated_length": 651.2,
"completions/mean_length": 208.3013671875,
"completions/mean_terminated_length": 207.65260314941406,
"completions/min_length": 97.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.544,
"grad_norm": 0.0009282738319598138,
"learning_rate": 1e-06,
"loss": 0.0011,
"num_tokens": 576531241.0,
"reward": 0.8175342917442322,
"reward_std": 0.09144499897956848,
"rewards/accuracy_reward": 0.55869140625,
"rewards/brier_reward": 0.7725163578987122,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.002964147459715605,
"rewards/frontier_coverage_1": 0.08193773776292801,
"rewards/frontier_coverage_10": 0.08193773776292801,
"rewards/frontier_coverage_15": 0.08193773776292801,
"rewards/frontier_coverage_20": 0.08030623197555542,
"rewards/frontier_coverage_25": 0.05987264439463615,
"rewards/frontier_coverage_5": 0.08193773776292801,
"rewards/frontier_ece_reward": 0.006552364397794008,
"rewards/frontier_entropy_batch_reward": -0.05003713071346283,
"signal/accuracy_reward/centered_abs_mean": 0.129425048828125,
"signal/accuracy_reward/group_bin_occupancy": 0.1859375,
"signal/accuracy_reward/group_std_mean": 0.1712253749370575,
"signal/accuracy_reward/group_zero_std_frac": 0.5125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0647125244140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0647125244140625,
"signal/advantage_abs_mean": 0.06956043541431427,
"signal/advantage_pre_scale_abs_mean": 0.06956043541431427,
"signal/advantage_pre_scale_std": 0.11212797164916992,
"signal/advantage_std": 0.11212797164916992,
"signal/brier_reward/centered_abs_mean": 0.15993968546390533,
"signal/brier_reward/group_bin_occupancy": 0.828515625,
"signal/brier_reward/group_std_mean": 0.20379654169082642,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.007788042444735766,
"signal/brier_reward/weight": 0.04869266897439957,
"signal/brier_reward/weighted_centered_abs_mean": 0.007788042444735766,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003263408271595836,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.670703125,
"signal/frontier_aurc_reward/group_std_mean": 0.005496641155332327,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.738151423633099e-05,
"signal/frontier_aurc_reward/weight": 0.020647920295596124,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.738151423633099e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2161277115345001,
"signal/frontier_coverage_1/group_bin_occupancy": 0.859375,
"signal/frontier_coverage_1/group_std_mean": 0.2762247920036316,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004016902018338442,
"signal/frontier_coverage_1/weight": 0.01858627498149872,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004016902018338442,
"signal/frontier_coverage_10/centered_abs_mean": 0.2161277115345001,
"signal/frontier_coverage_10/group_bin_occupancy": 0.859375,
"signal/frontier_coverage_10/group_std_mean": 0.2762247920036316,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004016902018338442,
"signal/frontier_coverage_10/weight": 0.01858627498149872,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004016902018338442,
"signal/frontier_coverage_15/centered_abs_mean": 0.2161277115345001,
"signal/frontier_coverage_15/group_bin_occupancy": 0.859375,
"signal/frontier_coverage_15/group_std_mean": 0.2762247920036316,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004016902018338442,
"signal/frontier_coverage_15/weight": 0.01858627498149872,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004016902018338442,
"signal/frontier_coverage_20/centered_abs_mean": 0.2121141731739044,
"signal/frontier_coverage_20/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_20/group_std_mean": 0.27119354605674745,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0039430524222552775,
"signal/frontier_coverage_20/weight": 0.018590064719319344,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0039430524222552775,
"signal/frontier_coverage_25/centered_abs_mean": 0.14031263887882234,
"signal/frontier_coverage_25/group_bin_occupancy": 0.851953125,
"signal/frontier_coverage_25/group_std_mean": 0.18075886964797974,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026261318940669297,
"signal/frontier_coverage_25/weight": 0.018716811016201974,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026261318940669297,
"signal/frontier_coverage_5/centered_abs_mean": 0.2161277115345001,
"signal/frontier_coverage_5/group_bin_occupancy": 0.859375,
"signal/frontier_coverage_5/group_std_mean": 0.2762247920036316,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004016902018338442,
"signal/frontier_coverage_5/weight": 0.01858627498149872,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004016902018338442,
"signal/frontier_ece_reward/centered_abs_mean": 0.011203336343169213,
"signal/frontier_ece_reward/group_bin_occupancy": 0.78828125,
"signal/frontier_ece_reward/group_std_mean": 0.014098120294511319,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015974577516317367,
"signal/frontier_ece_reward/weight": 0.1425902009010315,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015974577516317367,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07690173387527466,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.54921875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.09812586307525635,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013595831673592329,
"signal/frontier_entropy_batch_reward/weight": 0.17671723365783693,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013595831673592329,
"step": 170
},
{
"adaptive_ema/accuracy_reward": 0.4436072018924998,
"adaptive_ema/brier_reward": 0.6883438409870493,
"adaptive_ema/format_reward": 0.9346330694496237,
"adaptive_ema/frontier_aurc_reward": 0.06068736120478294,
"adaptive_ema/frontier_coverage_1": 0.15437627981998278,
"adaptive_ema/frontier_coverage_10": 0.15437627981998278,
"adaptive_ema/frontier_coverage_15": 0.15437627981998278,
"adaptive_ema/frontier_coverage_20": 0.15415214953497286,
"adaptive_ema/frontier_coverage_25": 0.14754116154013136,
"adaptive_ema/frontier_coverage_5": 0.15437627981998278,
"adaptive_ema/frontier_ece_reward": 0.07102756252520259,
"adaptive_ema/frontier_entropy_batch_reward": -0.14206769968694552,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.048053061962127684,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.020739533379673956,
"adaptive_weight/frontier_coverage_1": 0.018670928850769996,
"adaptive_weight/frontier_coverage_10": 0.018670928850769996,
"adaptive_weight/frontier_coverage_15": 0.018670928850769996,
"adaptive_weight/frontier_coverage_20": 0.018675877153873442,
"adaptive_weight/frontier_coverage_25": 0.018821846693754196,
"adaptive_weight/frontier_coverage_5": 0.018670928850769996,
"adaptive_weight/frontier_ece_reward": 0.14323482811450958,
"adaptive_weight/frontier_entropy_batch_reward": 0.17609113454818726,
"calibration/aurc": 0.2730237709859521,
"calibration/batch_distribution_entropy": 0.9713218020962746,
"calibration/batch_entropy_100bins": 0.7193840849266884,
"calibration/batch_entropy_10bins": 0.9713218020962746,
"calibration/batch_entropy_50bins": 0.8190414280746543,
"calibration/batch_uniqueness": 0.9050258267993678,
"calibration/buffer_distribution_entropy": 0.9767955435357575,
"calibration/buffer_entropy_100bins": 0.710864333067301,
"calibration/buffer_entropy_10bins": 0.9767955435357575,
"calibration/buffer_entropy_50bins": 0.8100096000160144,
"calibration/confidence_entropy": 0.46547111782143746,
"calibration/coverage@0%": 0.005475629892367906,
"calibration/coverage@1%": 0.005475629892367906,
"calibration/coverage@10%": 0.19846272627201564,
"calibration/coverage@15%": 0.3146312377690802,
"calibration/coverage@20%": 0.40383057118395305,
"calibration/coverage@25%": 0.48442163038160474,
"calibration/coverage@30%": 0.5344919581702544,
"calibration/coverage@5%": 0.1410225048923679,
"calibration/ece": 0.15978001365786038,
"calibration/mean_confidence": 0.44530827829419434,
"calibration/prompt_uniqueness": 0.7652296496975806,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1149.0,
"completions/max_terminated_length": 646.4,
"completions/mean_length": 208.6453125,
"completions/mean_terminated_length": 208.12649230957032,
"completions/min_length": 94.2,
"completions/min_terminated_length": 94.2,
"epoch": 0.56,
"grad_norm": 0.0010633780620992184,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 593489177.0,
"reward": 0.8009234666824341,
"reward_std": 0.08025546967983246,
"rewards/accuracy_reward": 0.52197265625,
"rewards/brier_reward": 0.7800888299942017,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0029034676495939492,
"rewards/frontier_coverage_1": 0.11806583553552627,
"rewards/frontier_coverage_10": 0.11806583553552627,
"rewards/frontier_coverage_15": 0.11806583553552627,
"rewards/frontier_coverage_20": 0.11465947777032852,
"rewards/frontier_coverage_25": 0.08335476815700531,
"rewards/frontier_coverage_5": 0.11806583553552627,
"rewards/frontier_ece_reward": 0.007106996979564429,
"rewards/frontier_entropy_batch_reward": -0.0612617876380682,
"signal/accuracy_reward/centered_abs_mean": 0.096990966796875,
"signal/accuracy_reward/group_bin_occupancy": 0.176953125,
"signal/accuracy_reward/group_std_mean": 0.1360134780406952,
"signal/accuracy_reward/group_zero_std_frac": 0.584375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0484954833984375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0484954833984375,
"signal/advantage_abs_mean": 0.05966150388121605,
"signal/advantage_pre_scale_abs_mean": 0.05966150388121605,
"signal/advantage_pre_scale_std": 0.10060604065656661,
"signal/advantage_std": 0.10060604065656661,
"signal/brier_reward/centered_abs_mean": 0.15158625245094298,
"signal/brier_reward/group_bin_occupancy": 0.81875,
"signal/brier_reward/group_std_mean": 0.19490756690502167,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0072855155915021895,
"signal/brier_reward/weight": 0.048053061962127684,
"signal/brier_reward/weighted_centered_abs_mean": 0.0072855155915021895,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_bin_occupancy": 0.127734375,
"signal/format_reward/group_std_mean": 0.0038669900968670845,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003106844192370772,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.68359375,
"signal/frontier_aurc_reward/group_std_mean": 0.005209229234606027,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.443956008297391e-05,
"signal/frontier_aurc_reward/weight": 0.020739533379673956,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.443956008297391e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.20348803400993348,
"signal/frontier_coverage_1/group_bin_occupancy": 0.851953125,
"signal/frontier_coverage_1/group_std_mean": 0.2632997930049896,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037990906741470097,
"signal/frontier_coverage_1/weight": 0.018670928850769996,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037990906741470097,
"signal/frontier_coverage_10/centered_abs_mean": 0.20348803400993348,
"signal/frontier_coverage_10/group_bin_occupancy": 0.851953125,
"signal/frontier_coverage_10/group_std_mean": 0.2632997930049896,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037990906741470097,
"signal/frontier_coverage_10/weight": 0.018670928850769996,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037990906741470097,
"signal/frontier_coverage_15/centered_abs_mean": 0.20348803400993348,
"signal/frontier_coverage_15/group_bin_occupancy": 0.851953125,
"signal/frontier_coverage_15/group_std_mean": 0.2632997930049896,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037990906741470097,
"signal/frontier_coverage_15/weight": 0.018670928850769996,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037990906741470097,
"signal/frontier_coverage_20/centered_abs_mean": 0.1926076591014862,
"signal/frontier_coverage_20/group_bin_occupancy": 0.848828125,
"signal/frontier_coverage_20/group_std_mean": 0.24944129288196565,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035968903452157975,
"signal/frontier_coverage_20/weight": 0.018675877153873442,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035968903452157975,
"signal/frontier_coverage_25/centered_abs_mean": 0.1296244353055954,
"signal/frontier_coverage_25/group_bin_occupancy": 0.85625,
"signal/frontier_coverage_25/group_std_mean": 0.16898023784160615,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024395577609539034,
"signal/frontier_coverage_25/weight": 0.018821846693754196,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024395577609539034,
"signal/frontier_coverage_5/centered_abs_mean": 0.20348803400993348,
"signal/frontier_coverage_5/group_bin_occupancy": 0.851953125,
"signal/frontier_coverage_5/group_std_mean": 0.2632997930049896,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037990906741470097,
"signal/frontier_coverage_5/weight": 0.018670928850769996,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037990906741470097,
"signal/frontier_ece_reward/centered_abs_mean": 0.010423007607460021,
"signal/frontier_ece_reward/group_bin_occupancy": 0.778515625,
"signal/frontier_ece_reward/group_std_mean": 0.013230705820024013,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001492859236896038,
"signal/frontier_ece_reward/weight": 0.14323482811450958,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001492859236896038,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08325772732496262,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.55859375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.10612156391143798,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014658915996551513,
"signal/frontier_entropy_batch_reward/weight": 0.17609113454818726,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014658915996551513,
"step": 175
},
{
"adaptive_ema/accuracy_reward": 0.44623034630379654,
"adaptive_ema/brier_reward": 0.6926405104345207,
"adaptive_ema/format_reward": 0.937817631708912,
"adaptive_ema/frontier_aurc_reward": 0.05754573219740727,
"adaptive_ema/frontier_coverage_1": 0.15310669916635733,
"adaptive_ema/frontier_coverage_10": 0.15310669916635733,
"adaptive_ema/frontier_coverage_15": 0.15310669916635733,
"adaptive_ema/frontier_coverage_20": 0.1524453964077967,
"adaptive_ema/frontier_coverage_25": 0.14439836258841363,
"adaptive_ema/frontier_coverage_5": 0.15310669916635733,
"adaptive_ema/frontier_ece_reward": 0.06787384748708213,
"adaptive_ema/frontier_entropy_batch_reward": -0.13817989528525737,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.04743674695491791,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.020829180255532265,
"adaptive_weight/frontier_coverage_1": 0.018717186525464058,
"adaptive_weight/frontier_coverage_10": 0.018717186525464058,
"adaptive_weight/frontier_coverage_15": 0.018717186525464058,
"adaptive_weight/frontier_coverage_20": 0.0187318030744791,
"adaptive_weight/frontier_coverage_25": 0.018909652531147004,
"adaptive_weight/frontier_coverage_5": 0.018717186525464058,
"adaptive_weight/frontier_ece_reward": 0.14386117160320283,
"adaptive_weight/frontier_entropy_batch_reward": 0.17566269338130952,
"calibration/aurc": 0.3368074206827616,
"calibration/batch_distribution_entropy": 0.9719911527106417,
"calibration/batch_entropy_100bins": 0.7169152619774707,
"calibration/batch_entropy_10bins": 0.9719911527106417,
"calibration/batch_entropy_50bins": 0.815783581914066,
"calibration/batch_uniqueness": 0.9021641926185449,
"calibration/buffer_distribution_entropy": 0.9770301781043716,
"calibration/buffer_entropy_100bins": 0.7121975254743294,
"calibration/buffer_entropy_10bins": 0.9770301781043716,
"calibration/buffer_entropy_50bins": 0.8112773883011256,
"calibration/confidence_entropy": 0.47810382196565415,
"calibration/coverage@0%": 0.003126528864970646,
"calibration/coverage@1%": 0.003126528864970646,
"calibration/coverage@10%": 0.05937652886497065,
"calibration/coverage@15%": 0.10941398605675148,
"calibration/coverage@20%": 0.2547303082191781,
"calibration/coverage@25%": 0.3168396832191781,
"calibration/coverage@30%": 0.39585906922700587,
"calibration/coverage@5%": 0.003126528864970646,
"calibration/ece": 0.09531413935390712,
"calibration/mean_confidence": 0.4680983178378737,
"calibration/prompt_uniqueness": 0.7562092506178459,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 753.2,
"completions/max_terminated_length": 538.4,
"completions/mean_length": 210.11298828125,
"completions/mean_terminated_length": 209.98328247070313,
"completions/min_length": 86.6,
"completions/min_terminated_length": 86.6,
"epoch": 0.576,
"grad_norm": 0.00579429604113102,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 610827358.0,
"reward": 0.7945305824279785,
"reward_std": 0.07392666339874268,
"rewards/accuracy_reward": 0.50703125,
"rewards/brier_reward": 0.775688111782074,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0032747992780059578,
"rewards/frontier_coverage_1": 0.12220384031534196,
"rewards/frontier_coverage_10": 0.12220384031534196,
"rewards/frontier_coverage_15": 0.12220384031534196,
"rewards/frontier_coverage_20": 0.11271904930472373,
"rewards/frontier_coverage_25": 0.07997815757989883,
"rewards/frontier_coverage_5": 0.12220384031534196,
"rewards/frontier_ece_reward": 0.00646611051633954,
"rewards/frontier_entropy_batch_reward": -0.053216959536075595,
"signal/accuracy_reward/centered_abs_mean": 0.08941650390625,
"signal/accuracy_reward/group_bin_occupancy": 0.171875,
"signal/accuracy_reward/group_std_mean": 0.123899807035923,
"signal/accuracy_reward/group_zero_std_frac": 0.625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044708251953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044708251953125,
"signal/advantage_abs_mean": 0.05529655441641808,
"signal/advantage_pre_scale_abs_mean": 0.05529655441641808,
"signal/advantage_pre_scale_std": 0.0951567530632019,
"signal/advantage_std": 0.0951567530632019,
"signal/brier_reward/centered_abs_mean": 0.1453737199306488,
"signal/brier_reward/group_bin_occupancy": 0.8125,
"signal/brier_reward/group_std_mean": 0.18699900507926942,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.006896778661757708,
"signal/brier_reward/weight": 0.04743674695491791,
"signal/brier_reward/weighted_centered_abs_mean": 0.006896778661757708,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003002939047291875,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.691015625,
"signal/frontier_aurc_reward/group_std_mean": 0.004798801522701979,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.253647443372756e-05,
"signal/frontier_aurc_reward/weight": 0.020829180255532265,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.253647443372756e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19401188492774962,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8515625,
"signal/frontier_coverage_1/group_std_mean": 0.2502962052822113,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036313798744231464,
"signal/frontier_coverage_1/weight": 0.018717186525464058,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036313798744231464,
"signal/frontier_coverage_10/centered_abs_mean": 0.19401188492774962,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8515625,
"signal/frontier_coverage_10/group_std_mean": 0.2502962052822113,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036313798744231464,
"signal/frontier_coverage_10/weight": 0.018717186525464058,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036313798744231464,
"signal/frontier_coverage_15/centered_abs_mean": 0.19401188492774962,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8515625,
"signal/frontier_coverage_15/group_std_mean": 0.2502962052822113,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036313798744231464,
"signal/frontier_coverage_15/weight": 0.018717186525464058,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036313798744231464,
"signal/frontier_coverage_20/centered_abs_mean": 0.18169912993907927,
"signal/frontier_coverage_20/group_bin_occupancy": 0.84765625,
"signal/frontier_coverage_20/group_std_mean": 0.23473725318908692,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034035469871014357,
"signal/frontier_coverage_20/weight": 0.0187318030744791,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034035469871014357,
"signal/frontier_coverage_25/centered_abs_mean": 0.12429940104484558,
"signal/frontier_coverage_25/group_bin_occupancy": 0.837890625,
"signal/frontier_coverage_25/group_std_mean": 0.16169273257255554,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002350442623719573,
"signal/frontier_coverage_25/weight": 0.018909652531147004,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002350442623719573,
"signal/frontier_coverage_5/centered_abs_mean": 0.19401188492774962,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8515625,
"signal/frontier_coverage_5/group_std_mean": 0.2502962052822113,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036313798744231464,
"signal/frontier_coverage_5/weight": 0.018717186525464058,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036313798744231464,
"signal/frontier_ece_reward/centered_abs_mean": 0.009726777486503125,
"signal/frontier_ece_reward/group_bin_occupancy": 0.75625,
"signal/frontier_ece_reward/group_std_mean": 0.012411239556968212,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013992791762575507,
"signal/frontier_ece_reward/weight": 0.14386117160320283,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013992791762575507,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07289832383394242,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.576953125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.09288608580827713,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012805469892919063,
"signal/frontier_entropy_batch_reward/weight": 0.17566269338130952,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.012805469892919063,
"step": 180
},
{
"adaptive_ema/accuracy_reward": 0.4493698127133222,
"adaptive_ema/brier_reward": 0.6970354247086294,
"adaptive_ema/format_reward": 0.9408555370667686,
"adaptive_ema/frontier_aurc_reward": 0.05457625015351719,
"adaptive_ema/frontier_coverage_1": 0.15179881337716356,
"adaptive_ema/frontier_coverage_10": 0.15179881337716356,
"adaptive_ema/frontier_coverage_15": 0.15179881337716356,
"adaptive_ema/frontier_coverage_20": 0.15061851856890654,
"adaptive_ema/frontier_coverage_25": 0.1413812478201582,
"adaptive_ema/frontier_coverage_5": 0.15179881337716356,
"adaptive_ema/frontier_ece_reward": 0.0648879138418748,
"adaptive_ema/frontier_entropy_batch_reward": -0.13473297325994987,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.04680159762501716,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.02091408334672451,
"adaptive_weight/frontier_coverage_1": 0.018763383477926256,
"adaptive_weight/frontier_coverage_10": 0.018763383477926256,
"adaptive_weight/frontier_coverage_15": 0.018763383477926256,
"adaptive_weight/frontier_coverage_20": 0.018789494037628175,
"adaptive_weight/frontier_coverage_25": 0.018993837013840675,
"adaptive_weight/frontier_coverage_5": 0.018763383477926256,
"adaptive_weight/frontier_ece_reward": 0.14445513784885405,
"adaptive_weight/frontier_entropy_batch_reward": 0.17529231011867524,
"calibration/aurc": 0.33088350022860324,
"calibration/batch_distribution_entropy": 0.9679954682002816,
"calibration/batch_entropy_100bins": 0.7056431213138912,
"calibration/batch_entropy_10bins": 0.9679954682002816,
"calibration/batch_entropy_50bins": 0.8047735588594278,
"calibration/batch_uniqueness": 0.8946394081391393,
"calibration/buffer_distribution_entropy": 0.9776141899564792,
"calibration/buffer_entropy_100bins": 0.7140486705625073,
"calibration/buffer_entropy_10bins": 0.9776141899564792,
"calibration/buffer_entropy_50bins": 0.8130714209116869,
"calibration/confidence_entropy": 0.4649555720368629,
"calibration/coverage@0%": 0.0265625,
"calibration/coverage@1%": 0.0265625,
"calibration/coverage@10%": 0.08795407289628179,
"calibration/coverage@15%": 0.22830005503913892,
"calibration/coverage@20%": 0.36429106531311156,
"calibration/coverage@25%": 0.46940053204500976,
"calibration/coverage@30%": 0.5401395853718199,
"calibration/coverage@5%": 0.032421875,
"calibration/ece": 0.13667109285066767,
"calibration/mean_confidence": 0.46604109651994363,
"calibration/prompt_uniqueness": 0.7341625138202393,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 912.2,
"completions/max_terminated_length": 510.2,
"completions/mean_length": 212.197265625,
"completions/mean_terminated_length": 211.68154907226562,
"completions/min_length": 91.8,
"completions/min_terminated_length": 91.8,
"epoch": 0.592,
"grad_norm": 0.0012193727307021618,
"learning_rate": 1e-06,
"loss": 0.0009,
"num_tokens": 628167970.0,
"reward": 0.7940310955047607,
"reward_std": 0.07889594733715058,
"rewards/accuracy_reward": 0.5125,
"rewards/brier_reward": 0.7858376264572143,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.00308399866335094,
"rewards/frontier_coverage_1": 0.12847063392400743,
"rewards/frontier_coverage_10": 0.12847063392400743,
"rewards/frontier_coverage_15": 0.12847063392400743,
"rewards/frontier_coverage_20": 0.11759204119443893,
"rewards/frontier_coverage_25": 0.08462818264961243,
"rewards/frontier_coverage_5": 0.12847063392400743,
"rewards/frontier_ece_reward": 0.007113531790673732,
"rewards/frontier_entropy_batch_reward": -0.07595221474766731,
"signal/accuracy_reward/centered_abs_mean": 0.0996337890625,
"signal/accuracy_reward/group_bin_occupancy": 0.172265625,
"signal/accuracy_reward/group_std_mean": 0.1323814406991005,
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04981689453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04981689453125,
"signal/advantage_abs_mean": 0.06043521165847778,
"signal/advantage_pre_scale_abs_mean": 0.06043521165847778,
"signal/advantage_pre_scale_std": 0.10175900906324387,
"signal/advantage_std": 0.10175900906324387,
"signal/brier_reward/centered_abs_mean": 0.1380321741104126,
"signal/brier_reward/group_bin_occupancy": 0.804296875,
"signal/brier_reward/group_std_mean": 0.17737728655338286,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0064600460231304165,
"signal/brier_reward/weight": 0.04680159762501716,
"signal/brier_reward/weighted_centered_abs_mean": 0.0064600460231304165,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0022097086068242787,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030496908351778985,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.676953125,
"signal/frontier_aurc_reward/group_std_mean": 0.004931708890944719,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.378361213137396e-05,
"signal/frontier_aurc_reward/weight": 0.02091408334672451,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.378361213137396e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19160583317279817,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_1/group_std_mean": 0.24507599472999572,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035951600410044195,
"signal/frontier_coverage_1/weight": 0.018763383477926256,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035951600410044195,
"signal/frontier_coverage_10/centered_abs_mean": 0.19160583317279817,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_10/group_std_mean": 0.24507599472999572,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035951600410044195,
"signal/frontier_coverage_10/weight": 0.018763383477926256,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035951600410044195,
"signal/frontier_coverage_15/centered_abs_mean": 0.19160583317279817,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_15/group_std_mean": 0.24507599472999572,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035951600410044195,
"signal/frontier_coverage_15/weight": 0.018763383477926256,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035951600410044195,
"signal/frontier_coverage_20/centered_abs_mean": 0.1756891280412674,
"signal/frontier_coverage_20/group_bin_occupancy": 0.857421875,
"signal/frontier_coverage_20/group_std_mean": 0.22524542212486268,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003301054053008556,
"signal/frontier_coverage_20/weight": 0.018789494037628175,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003301054053008556,
"signal/frontier_coverage_25/centered_abs_mean": 0.11949324905872345,
"signal/frontier_coverage_25/group_bin_occupancy": 0.850390625,
"signal/frontier_coverage_25/group_std_mean": 0.15409801304340362,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022695485502481462,
"signal/frontier_coverage_25/weight": 0.018993837013840675,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022695485502481462,
"signal/frontier_coverage_5/centered_abs_mean": 0.19160583317279817,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_5/group_std_mean": 0.24507599472999572,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035951600410044195,
"signal/frontier_coverage_5/weight": 0.018763383477926256,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035951600410044195,
"signal/frontier_ece_reward/centered_abs_mean": 0.009413376450538635,
"signal/frontier_ece_reward/group_bin_occupancy": 0.753125,
"signal/frontier_ece_reward/group_std_mean": 0.01191569771617651,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013597635086625814,
"signal/frontier_ece_reward/weight": 0.14445513784885405,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013597635086625814,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10269325971603394,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.54453125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.1308152124285698,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018002053909003734,
"signal/frontier_entropy_batch_reward/weight": 0.17529231011867524,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018002053909003734,
"step": 185
},
{
"adaptive_ema/accuracy_reward": 0.45314128502077644,
"adaptive_ema/brier_reward": 0.7017613023013852,
"adaptive_ema/format_reward": 0.9437244439127237,
"adaptive_ema/frontier_aurc_reward": 0.051766875792472265,
"adaptive_ema/frontier_coverage_1": 0.15052466761584,
"adaptive_ema/frontier_coverage_10": 0.15052466761584,
"adaptive_ema/frontier_coverage_15": 0.15052466761584,
"adaptive_ema/frontier_coverage_20": 0.1489783495752756,
"adaptive_ema/frontier_coverage_25": 0.13857511475546141,
"adaptive_ema/frontier_coverage_5": 0.15052466761584,
"adaptive_ema/frontier_ece_reward": 0.06204476964928107,
"adaptive_ema/frontier_entropy_batch_reward": -0.1309122085854708,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.04612754210829735,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.021001752093434335,
"adaptive_weight/frontier_coverage_1": 0.01881442852318287,
"adaptive_weight/frontier_coverage_10": 0.01881442852318287,
"adaptive_weight/frontier_coverage_15": 0.01881442852318287,
"adaptive_weight/frontier_coverage_20": 0.018848678469657897,
"adaptive_weight/frontier_coverage_25": 0.019079096987843512,
"adaptive_weight/frontier_coverage_5": 0.01881442852318287,
"adaptive_weight/frontier_ece_reward": 0.14507063925266267,
"adaptive_weight/frontier_entropy_batch_reward": 0.17491456866264343,
"calibration/aurc": 0.2595289017864723,
"calibration/batch_distribution_entropy": 0.9623724306524405,
"calibration/batch_entropy_100bins": 0.6852396221141621,
"calibration/batch_entropy_10bins": 0.9623724306524405,
"calibration/batch_entropy_50bins": 0.7850811373268268,
"calibration/batch_uniqueness": 0.8823629921002517,
"calibration/buffer_distribution_entropy": 0.9781941054039418,
"calibration/buffer_entropy_100bins": 0.7150254784873155,
"calibration/buffer_entropy_10bins": 0.9781941054039418,
"calibration/buffer_entropy_50bins": 0.8138911069544555,
"calibration/confidence_entropy": 0.463122295750538,
"calibration/coverage@0%": 0.016409325787401573,
"calibration/coverage@1%": 0.016409325787401573,
"calibration/coverage@10%": 0.21656311515748033,
"calibration/coverage@15%": 0.2942974901574803,
"calibration/coverage@20%": 0.43224963090551183,
"calibration/coverage@25%": 0.5448080708661418,
"calibration/coverage@30%": 0.6462567667322835,
"calibration/coverage@5%": 0.05937807578740158,
"calibration/ece": 0.10539165292814963,
"calibration/mean_confidence": 0.44898269623523623,
"calibration/prompt_uniqueness": 0.7121562101403061,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1081.6,
"completions/max_terminated_length": 749.2,
"completions/mean_length": 215.37314453125,
"completions/mean_terminated_length": 214.9864929199219,
"completions/min_length": 79.0,
"completions/min_terminated_length": 79.0,
"epoch": 0.608,
"grad_norm": 0.0009518972947262228,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 645372879.0,
"reward": 0.802846360206604,
"reward_std": 0.07410637587308884,
"rewards/accuracy_reward": 0.5197265625,
"rewards/brier_reward": 0.8024592280387879,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0026110153179615737,
"rewards/frontier_coverage_1": 0.14084916114807128,
"rewards/frontier_coverage_10": 0.14084916114807128,
"rewards/frontier_coverage_15": 0.14084916114807128,
"rewards/frontier_coverage_20": 0.1310635909438133,
"rewards/frontier_coverage_25": 0.09141481071710586,
"rewards/frontier_coverage_5": 0.14084916114807128,
"rewards/frontier_ece_reward": 0.007165602501481771,
"rewards/frontier_entropy_batch_reward": -0.054858258366584776,
"signal/accuracy_reward/centered_abs_mean": 0.09466552734375,
"signal/accuracy_reward/group_bin_occupancy": 0.17265625,
"signal/accuracy_reward/group_std_mean": 0.12934612184762956,
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047332763671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.047332763671875,
"signal/advantage_abs_mean": 0.05499119162559509,
"signal/advantage_pre_scale_abs_mean": 0.05499119162559509,
"signal/advantage_pre_scale_std": 0.09592062830924988,
"signal/advantage_std": 0.09592062830924988,
"signal/brier_reward/centered_abs_mean": 0.12933626472949983,
"signal/brier_reward/group_bin_occupancy": 0.810546875,
"signal/brier_reward/group_std_mean": 0.16631879806518554,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00596773847937584,
"signal/brier_reward/weight": 0.04612754210829735,
"signal/brier_reward/weighted_centered_abs_mean": 0.00596773847937584,
"signal/format_reward/centered_abs_mean": 0.001251220703125,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0027073150966316463,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006256103515625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006256103515625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023603408131748437,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6890625,
"signal/frontier_aurc_reward/group_std_mean": 0.0038498918525874614,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.957094206474722e-05,
"signal/frontier_aurc_reward/weight": 0.021001752093434335,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.957094206474722e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1896822929382324,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8453125,
"signal/frontier_coverage_1/group_std_mean": 0.24189280569553376,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035686347633600234,
"signal/frontier_coverage_1/weight": 0.01881442852318287,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035686347633600234,
"signal/frontier_coverage_10/centered_abs_mean": 0.1896822929382324,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8453125,
"signal/frontier_coverage_10/group_std_mean": 0.24189280569553376,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035686347633600234,
"signal/frontier_coverage_10/weight": 0.01881442852318287,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035686347633600234,
"signal/frontier_coverage_15/centered_abs_mean": 0.1896822929382324,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8453125,
"signal/frontier_coverage_15/group_std_mean": 0.24189280569553376,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035686347633600234,
"signal/frontier_coverage_15/weight": 0.01881442852318287,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035686347633600234,
"signal/frontier_coverage_20/centered_abs_mean": 0.17528200447559356,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8390625,
"signal/frontier_coverage_20/group_std_mean": 0.2239384174346924,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033036709763109686,
"signal/frontier_coverage_20/weight": 0.018848678469657897,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033036709763109686,
"signal/frontier_coverage_25/centered_abs_mean": 0.11379878968000412,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8375,
"signal/frontier_coverage_25/group_std_mean": 0.14621945917606355,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021709577180445195,
"signal/frontier_coverage_25/weight": 0.019079096987843512,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021709577180445195,
"signal/frontier_coverage_5/centered_abs_mean": 0.1896822929382324,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8453125,
"signal/frontier_coverage_5/group_std_mean": 0.24189280569553376,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035686347633600234,
"signal/frontier_coverage_5/weight": 0.01881442852318287,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035686347633600234,
"signal/frontier_ece_reward/centered_abs_mean": 0.00886353775858879,
"signal/frontier_ece_reward/group_bin_occupancy": 0.728515625,
"signal/frontier_ece_reward/group_std_mean": 0.011153610236942769,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012857463909313083,
"signal/frontier_ece_reward/weight": 0.14507063925266267,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012857463909313083,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08986649960279465,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5265625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.11396473497152329,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015717819705605506,
"signal/frontier_entropy_batch_reward/weight": 0.17491456866264343,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015717819705605506,
"step": 190
},
{
"adaptive_ema/accuracy_reward": 0.4568694495063633,
"adaptive_ema/brier_reward": 0.7067290831899434,
"adaptive_ema/format_reward": 0.9464434331151395,
"adaptive_ema/frontier_aurc_reward": 0.049100899068368974,
"adaptive_ema/frontier_coverage_1": 0.14959135911548901,
"adaptive_ema/frontier_coverage_10": 0.14959135911548901,
"adaptive_ema/frontier_coverage_15": 0.14959135911548901,
"adaptive_ema/frontier_coverage_20": 0.14772546548971083,
"adaptive_ema/frontier_coverage_25": 0.13591206081688326,
"adaptive_ema/frontier_coverage_5": 0.14959135911548901,
"adaptive_ema/frontier_ece_reward": 0.05933345631108433,
"adaptive_ema/frontier_entropy_batch_reward": -0.12668591113333794,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.045429503172636033,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.021093441545963286,
"adaptive_weight/frontier_coverage_1": 0.01886429451406002,
"adaptive_weight/frontier_coverage_10": 0.01886429451406002,
"adaptive_weight/frontier_coverage_15": 0.01886429451406002,
"adaptive_weight/frontier_coverage_20": 0.018905685842037202,
"adaptive_weight/frontier_coverage_25": 0.019167742878198623,
"adaptive_weight/frontier_coverage_5": 0.01886429451406002,
"adaptive_weight/frontier_ece_reward": 0.14571548700332643,
"adaptive_weight/frontier_entropy_batch_reward": 0.17453096210956573,
"calibration/aurc": 0.2717893891009608,
"calibration/batch_distribution_entropy": 0.9790635308837882,
"calibration/batch_entropy_100bins": 0.6924911901826357,
"calibration/batch_entropy_10bins": 0.9790635308837882,
"calibration/batch_entropy_50bins": 0.7929727048707902,
"calibration/batch_uniqueness": 0.8949964966791282,
"calibration/buffer_distribution_entropy": 0.9789259595572238,
"calibration/buffer_entropy_100bins": 0.7148686390497873,
"calibration/buffer_entropy_10bins": 0.9789259595572238,
"calibration/buffer_entropy_50bins": 0.8139313132185979,
"calibration/confidence_entropy": 0.4959137955381828,
"calibration/coverage@0%": 0.008594514432485323,
"calibration/coverage@1%": 0.008594514432485323,
"calibration/coverage@10%": 0.09182821673189824,
"calibration/coverage@15%": 0.2587114726027397,
"calibration/coverage@20%": 0.40290866560665356,
"calibration/coverage@25%": 0.490063906555773,
"calibration/coverage@30%": 0.5912839408023484,
"calibration/coverage@5%": 0.008594514432485323,
"calibration/ece": 0.10975409017245596,
"calibration/mean_confidence": 0.4828339189395792,
"calibration/prompt_uniqueness": 0.7414707275136576,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 767.0,
"completions/max_terminated_length": 592.4,
"completions/mean_length": 214.82958984375,
"completions/mean_terminated_length": 214.31283569335938,
"completions/min_length": 97.6,
"completions/min_terminated_length": 97.6,
"epoch": 0.624,
"grad_norm": 0.0009802606655284762,
"learning_rate": 1e-06,
"loss": 0.0011,
"num_tokens": 662916638.0,
"reward": 0.807462728023529,
"reward_std": 0.07975933402776718,
"rewards/accuracy_reward": 0.5240234375,
"rewards/brier_reward": 0.7986777186393738,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.002779633179306984,
"rewards/frontier_coverage_1": 0.12782124131917955,
"rewards/frontier_coverage_10": 0.12782124131917955,
"rewards/frontier_coverage_15": 0.12782124131917955,
"rewards/frontier_coverage_20": 0.11921733915805817,
"rewards/frontier_coverage_25": 0.07854233682155609,
"rewards/frontier_coverage_5": 0.12782124131917955,
"rewards/frontier_ece_reward": 0.006253256555646658,
"rewards/frontier_entropy_batch_reward": -0.0283002408221364,
"signal/accuracy_reward/centered_abs_mean": 0.1125732421875,
"signal/accuracy_reward/group_bin_occupancy": 0.1765625,
"signal/accuracy_reward/group_std_mean": 0.14600562155246735,
"signal/accuracy_reward/group_zero_std_frac": 0.5875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05628662109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05628662109375,
"signal/advantage_abs_mean": 0.06189834251999855,
"signal/advantage_pre_scale_abs_mean": 0.06189834251999855,
"signal/advantage_pre_scale_std": 0.10502578765153885,
"signal/advantage_std": 0.10502578765153885,
"signal/brier_reward/centered_abs_mean": 0.13457954227924346,
"signal/brier_reward/group_bin_occupancy": 0.828515625,
"signal/brier_reward/group_std_mean": 0.1729972928762436,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.006113854143768549,
"signal/brier_reward/weight": 0.045429503172636033,
"signal/brier_reward/weighted_centered_abs_mean": 0.006113854143768549,
"signal/format_reward/centered_abs_mean": 0.000933837890625,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0024258274119347335,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026384861208498477,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.682421875,
"signal/frontier_aurc_reward/group_std_mean": 0.004262262210249901,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.5659517965978014e-05,
"signal/frontier_aurc_reward/weight": 0.021093441545963286,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.5659517965978014e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1896256685256958,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_1/group_std_mean": 0.24310458302497864,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003577108820900321,
"signal/frontier_coverage_1/weight": 0.01886429451406002,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003577108820900321,
"signal/frontier_coverage_10/centered_abs_mean": 0.1896256685256958,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_10/group_std_mean": 0.24310458302497864,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003577108820900321,
"signal/frontier_coverage_10/weight": 0.01886429451406002,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003577108820900321,
"signal/frontier_coverage_15/centered_abs_mean": 0.1896256685256958,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_15/group_std_mean": 0.24310458302497864,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003577108820900321,
"signal/frontier_coverage_15/weight": 0.01886429451406002,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003577108820900321,
"signal/frontier_coverage_20/centered_abs_mean": 0.17169637084007264,
"signal/frontier_coverage_20/group_bin_occupancy": 0.853125,
"signal/frontier_coverage_20/group_std_mean": 0.22037405371665955,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003246018523350358,
"signal/frontier_coverage_20/weight": 0.018905685842037202,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003246018523350358,
"signal/frontier_coverage_25/centered_abs_mean": 0.10143829137086868,
"signal/frontier_coverage_25/group_bin_occupancy": 0.857421875,
"signal/frontier_coverage_25/group_std_mean": 0.13183338940143585,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019442901015281678,
"signal/frontier_coverage_25/weight": 0.019167742878198623,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019442901015281678,
"signal/frontier_coverage_5/centered_abs_mean": 0.1896256685256958,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_5/group_std_mean": 0.24310458302497864,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003577108820900321,
"signal/frontier_coverage_5/weight": 0.01886429451406002,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003577108820900321,
"signal/frontier_ece_reward/centered_abs_mean": 0.008129092678427697,
"signal/frontier_ece_reward/group_bin_occupancy": 0.74375,
"signal/frontier_ece_reward/group_std_mean": 0.0103994682431221,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001184539240784943,
"signal/frontier_ece_reward/weight": 0.14571548700332643,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001184539240784943,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.060908643156290056,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.57578125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.07627174183726311,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01063164221122861,
"signal/frontier_entropy_batch_reward/weight": 0.17453096210956573,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01063164221122861,
"step": 195
},
{
"adaptive_ema/accuracy_reward": 0.4612620261651603,
"adaptive_ema/brier_reward": 0.711105192442022,
"adaptive_ema/format_reward": 0.9490527967788385,
"adaptive_ema/frontier_aurc_reward": 0.04655472575071493,
"adaptive_ema/frontier_coverage_1": 0.14780793208898826,
"adaptive_ema/frontier_coverage_10": 0.14780793208898826,
"adaptive_ema/frontier_coverage_15": 0.14780793208898826,
"adaptive_ema/frontier_coverage_20": 0.14574844953907218,
"adaptive_ema/frontier_coverage_25": 0.1326994788727595,
"adaptive_ema/frontier_coverage_5": 0.14780793208898826,
"adaptive_ema/frontier_ece_reward": 0.05672248766440877,
"adaptive_ema/frontier_entropy_batch_reward": -0.12287724913026521,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.04479944705963135,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.02117249444127083,
"adaptive_weight/frontier_coverage_1": 0.018924034759402276,
"adaptive_weight/frontier_coverage_10": 0.018924034759402276,
"adaptive_weight/frontier_coverage_15": 0.018924034759402276,
"adaptive_weight/frontier_coverage_20": 0.018969768285751344,
"adaptive_weight/frontier_coverage_25": 0.019259539246559144,
"adaptive_weight/frontier_coverage_5": 0.018924034759402276,
"adaptive_weight/frontier_ece_reward": 0.14627588391304017,
"adaptive_weight/frontier_entropy_batch_reward": 0.17412672638893129,
"calibration/aurc": 0.2997646550413567,
"calibration/batch_distribution_entropy": 0.9725840439062055,
"calibration/batch_entropy_100bins": 0.6915793095789328,
"calibration/batch_entropy_10bins": 0.9725840439062055,
"calibration/batch_entropy_50bins": 0.7940186093835395,
"calibration/batch_uniqueness": 0.8902102105981953,
"calibration/buffer_distribution_entropy": 0.9795616752685407,
"calibration/buffer_entropy_100bins": 0.7146034846747893,
"calibration/buffer_entropy_10bins": 0.9795616752685407,
"calibration/buffer_entropy_50bins": 0.8138375318699215,
"calibration/confidence_entropy": 0.4847713799679016,
"calibration/coverage@0%": 0.00234375,
"calibration/coverage@1%": 0.00234375,
"calibration/coverage@10%": 0.225,
"calibration/coverage@15%": 0.3203125,
"calibration/coverage@20%": 0.351171875,
"calibration/coverage@25%": 0.3859375,
"calibration/coverage@30%": 0.580859375,
"calibration/coverage@5%": 0.075390625,
"calibration/ece": 0.17217692025440315,
"calibration/mean_confidence": 0.5315149217221136,
"calibration/prompt_uniqueness": 0.7357859346546566,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 1093.8,
"completions/max_terminated_length": 766.4,
"completions/mean_length": 219.97646484375,
"completions/mean_terminated_length": 219.20455932617188,
"completions/min_length": 102.8,
"completions/min_terminated_length": 102.8,
"epoch": 0.64,
"grad_norm": 0.0011383434757590294,
"learning_rate": 1e-06,
"loss": 0.0014,
"num_tokens": 680511885.0,
"reward": 0.8176485538482666,
"reward_std": 0.07317476570606232,
"rewards/accuracy_reward": 0.56357421875,
"rewards/brier_reward": 0.7944719076156617,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.002856502798385918,
"rewards/frontier_coverage_1": 0.09920123293995857,
"rewards/frontier_coverage_10": 0.09920123293995857,
"rewards/frontier_coverage_15": 0.09920123293995857,
"rewards/frontier_coverage_20": 0.09386955350637435,
"rewards/frontier_coverage_25": 0.06195079907774925,
"rewards/frontier_coverage_5": 0.09920123293995857,
"rewards/frontier_ece_reward": 0.005887755658477545,
"rewards/frontier_entropy_batch_reward": -0.06208570748567581,
"signal/accuracy_reward/centered_abs_mean": 0.090887451171875,
"signal/accuracy_reward/group_bin_occupancy": 0.17109375,
"signal/accuracy_reward/group_std_mean": 0.12432538270950318,
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0454437255859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0454437255859375,
"signal/advantage_abs_mean": 0.05513864755630493,
"signal/advantage_pre_scale_abs_mean": 0.05513864755630493,
"signal/advantage_pre_scale_std": 0.09758671969175339,
"signal/advantage_std": 0.09758671969175339,
"signal/brier_reward/centered_abs_mean": 0.12907020896673202,
"signal/brier_reward/group_bin_occupancy": 0.8046875,
"signal/brier_reward/group_std_mean": 0.1680096834897995,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0057831091806292536,
"signal/brier_reward/weight": 0.04479944705963135,
"signal/brier_reward/weighted_centered_abs_mean": 0.0057831091806292536,
"signal/format_reward/centered_abs_mean": 0.00106201171875,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.002154887979850173,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000531005859375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000531005859375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029212284367531537,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.66796875,
"signal/frontier_aurc_reward/group_std_mean": 0.004778983537107706,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.185173115227371e-05,
"signal/frontier_aurc_reward/weight": 0.02117249444127083,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.185173115227371e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16748106777667998,
"signal/frontier_coverage_1/group_bin_occupancy": 0.85078125,
"signal/frontier_coverage_1/group_std_mean": 0.21731913089752197,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003169310325756669,
"signal/frontier_coverage_1/weight": 0.018924034759402276,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003169310325756669,
"signal/frontier_coverage_10/centered_abs_mean": 0.16748106777667998,
"signal/frontier_coverage_10/group_bin_occupancy": 0.85078125,
"signal/frontier_coverage_10/group_std_mean": 0.21731913089752197,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003169310325756669,
"signal/frontier_coverage_10/weight": 0.018924034759402276,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003169310325756669,
"signal/frontier_coverage_15/centered_abs_mean": 0.16748106777667998,
"signal/frontier_coverage_15/group_bin_occupancy": 0.85078125,
"signal/frontier_coverage_15/group_std_mean": 0.21731913089752197,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003169310325756669,
"signal/frontier_coverage_15/weight": 0.018924034759402276,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003169310325756669,
"signal/frontier_coverage_20/centered_abs_mean": 0.1481163650751114,
"signal/frontier_coverage_20/group_bin_occupancy": 0.839453125,
"signal/frontier_coverage_20/group_std_mean": 0.19266715049743652,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002809662418439984,
"signal/frontier_coverage_20/weight": 0.018969768285751344,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002809662418439984,
"signal/frontier_coverage_25/centered_abs_mean": 0.08351867049932479,
"signal/frontier_coverage_25/group_bin_occupancy": 0.859375,
"signal/frontier_coverage_25/group_std_mean": 0.10931529998779296,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016084318980574609,
"signal/frontier_coverage_25/weight": 0.019259539246559144,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016084318980574609,
"signal/frontier_coverage_5/centered_abs_mean": 0.16748106777667998,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85078125,
"signal/frontier_coverage_5/group_std_mean": 0.21731913089752197,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003169310325756669,
"signal/frontier_coverage_5/weight": 0.018924034759402276,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003169310325756669,
"signal/frontier_ece_reward/centered_abs_mean": 0.007508605439215899,
"signal/frontier_ece_reward/group_bin_occupancy": 0.71875,
"signal/frontier_ece_reward/group_std_mean": 0.009579764865338802,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010982972104102372,
"signal/frontier_ece_reward/weight": 0.14627588391304017,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010982972104102372,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08711727261543274,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.54375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.10769864320755004,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015169884078204632,
"signal/frontier_entropy_batch_reward/weight": 0.17412672638893129,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015169884078204632,
"step": 200
},
{
"epoch": 0.64,
"eval_calibration/aurc": 0.5075719520741389,
"eval_calibration/batch_distribution_entropy": 0.8955715306096013,
"eval_calibration/batch_entropy_100bins": 0.5784960452681176,
"eval_calibration/batch_entropy_10bins": 0.8955715306096013,
"eval_calibration/batch_entropy_50bins": 0.6759995447475304,
"eval_calibration/batch_uniqueness": 0.8388671875,
"eval_calibration/buffer_distribution_entropy": 0.9798978060512744,
"eval_calibration/buffer_entropy_100bins": 0.7148025692200701,
"eval_calibration/buffer_entropy_10bins": 0.9798978060512744,
"eval_calibration/buffer_entropy_50bins": 0.8141065825865851,
"eval_calibration/confidence_entropy": 0.46110577371626493,
"eval_calibration/coverage@0%": 0.0390625,
"eval_calibration/coverage@1%": 0.0390625,
"eval_calibration/coverage@10%": 0.0390625,
"eval_calibration/coverage@15%": 0.0390625,
"eval_calibration/coverage@20%": 0.125,
"eval_calibration/coverage@25%": 0.125,
"eval_calibration/coverage@30%": 0.140625,
"eval_calibration/coverage@5%": 0.0390625,
"eval_calibration/ece": 0.22523437500000001,
"eval_calibration/mean_confidence": 0.464296875,
"eval_calibration/prompt_uniqueness": 0.8388671875,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 377.25,
"eval_completions/max_terminated_length": 377.25,
"eval_completions/mean_length": 220.45864868164062,
"eval_completions/mean_terminated_length": 220.45864868164062,
"eval_completions/min_length": 121.25,
"eval_completions/min_terminated_length": 121.25,
"eval_loss": 0.0,
"eval_num_tokens": 680511885.0,
"eval_reward": 0.7376370877027512,
"eval_reward_std": 0.2244972214102745,
"eval_rewards/accuracy_reward": 0.423828125,
"eval_rewards/brier_reward": 0.776837483048439,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0033958147396333516,
"eval_rewards/frontier_coverage_1": 0.17307812348008156,
"eval_rewards/frontier_coverage_10": 0.17307812348008156,
"eval_rewards/frontier_coverage_15": 0.17307812348008156,
"eval_rewards/frontier_coverage_20": 0.15501171723008156,
"eval_rewards/frontier_coverage_25": 0.08437968976795673,
"eval_rewards/frontier_coverage_5": 0.17307812348008156,
"eval_rewards/frontier_ece_reward": 0.00601613090839237,
"eval_rewards/frontier_entropy_batch_reward": -0.15737152099609375,
"eval_runtime": 20.4579,
"eval_samples_per_second": 24.44,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4764404296875,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.4955976828932762,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23822021484375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23822021484375,
"eval_signal/advantage_abs_mean": 0.206316988915205,
"eval_signal/advantage_pre_scale_abs_mean": 0.206316988915205,
"eval_signal/advantage_pre_scale_std": 0.22218631953001022,
"eval_signal/advantage_std": 0.22218631953001022,
"eval_signal/brier_reward/centered_abs_mean": 0.20140916854143143,
"eval_signal/brier_reward/group_bin_occupancy": 0.8984375,
"eval_signal/brier_reward/group_std_mean": 0.24949810281395912,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.008972602896392345,
"eval_signal/brier_reward/weight": 0.0445491299033165,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.008972602896392345,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004036986967548728,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7578125,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0066258255392313,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.55931230034912e-05,
"eval_signal/frontier_aurc_reward/weight": 0.02120222896337509,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.55931230034912e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3758217468857765,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_1/group_std_mean": 0.46960294246673584,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.007119854330085218,
"eval_signal/frontier_coverage_1/weight": 0.018944764509797096,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.007119854330085218,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3758217468857765,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_10/group_std_mean": 0.46960294246673584,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.007119854330085218,
"eval_signal/frontier_coverage_10/weight": 0.018944764509797096,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.007119854330085218,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3758217468857765,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_15/group_std_mean": 0.46960294246673584,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007119854330085218,
"eval_signal/frontier_coverage_15/weight": 0.018944764509797096,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.007119854330085218,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3399077132344246,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_20/group_std_mean": 0.42540228366851807,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006456138915382326,
"eval_signal/frontier_coverage_20/weight": 0.0189937986433506,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006456138915382326,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.1638285294175148,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_coverage_25/group_std_mean": 0.21629228815436363,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031611379818059504,
"eval_signal/frontier_coverage_25/weight": 0.019295405596494675,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031611379818059504,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3758217468857765,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_5/group_std_mean": 0.46960294246673584,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.007119854330085218,
"eval_signal/frontier_coverage_5/weight": 0.018944764509797096,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.007119854330085218,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.01058367220684886,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_ece_reward/group_std_mean": 0.013392903376370668,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015503591566812247,
"eval_signal/frontier_ece_reward/weight": 0.14648593962192535,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015503591566812247,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21271085739135742,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5625,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.2711870074272156,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.037010504864156246,
"eval_signal/frontier_entropy_batch_reward/weight": 0.17399443686008453,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.037010504864156246,
"eval_steps_per_second": 0.196,
"step": 200
},
{
"adaptive_ema/accuracy_reward": 0.46409849546061094,
"adaptive_ema/brier_reward": 0.7145793846897758,
"adaptive_ema/format_reward": 0.9515267914964713,
"adaptive_ema/frontier_aurc_reward": 0.04411230896135079,
"adaptive_ema/frontier_coverage_1": 0.14622026044036013,
"adaptive_ema/frontier_coverage_10": 0.14622026044036013,
"adaptive_ema/frontier_coverage_15": 0.14622026044036013,
"adaptive_ema/frontier_coverage_20": 0.1436728228617165,
"adaptive_ema/frontier_coverage_25": 0.12934636947766204,
"adaptive_ema/frontier_coverage_5": 0.14622026044036013,
"adaptive_ema/frontier_ece_reward": 0.0542079172449618,
"adaptive_ema/frontier_entropy_batch_reward": -0.11904105378724132,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.04429857730865479,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.021244903653860094,
"adaptive_weight/frontier_coverage_1": 0.018975519016385077,
"adaptive_weight/frontier_coverage_10": 0.018975519016385077,
"adaptive_weight/frontier_coverage_15": 0.018975519016385077,
"adaptive_weight/frontier_coverage_20": 0.019032138213515282,
"adaptive_weight/frontier_coverage_25": 0.019350550696253778,
"adaptive_weight/frontier_coverage_5": 0.018975519016385077,
"adaptive_weight/frontier_ece_reward": 0.14679138362407684,
"adaptive_weight/frontier_entropy_batch_reward": 0.1736803650856018,
"calibration/aurc": 0.4643865706258444,
"calibration/batch_distribution_entropy": 0.9674062131351813,
"calibration/batch_entropy_100bins": 0.7019551646206266,
"calibration/batch_entropy_10bins": 0.9674062131351813,
"calibration/batch_entropy_50bins": 0.8050765440156435,
"calibration/batch_uniqueness": 0.8971722714759565,
"calibration/buffer_distribution_entropy": 0.9801938657893574,
"calibration/buffer_entropy_100bins": 0.7151813913822835,
"calibration/buffer_entropy_10bins": 0.9801938657893574,
"calibration/buffer_entropy_50bins": 0.8145700052375018,
"calibration/confidence_entropy": 0.499118723854232,
"calibration/coverage@0%": 0.0019554182974559687,
"calibration/coverage@1%": 0.0019554182974559687,
"calibration/coverage@10%": 0.0019554182974559687,
"calibration/coverage@15%": 0.0019554182974559687,
"calibration/coverage@20%": 0.031653620352250486,
"calibration/coverage@25%": 0.10196764921722115,
"calibration/coverage@30%": 0.13126834637964774,
"calibration/coverage@5%": 0.0019554182974559687,
"calibration/ece": 0.12957827013098205,
"calibration/mean_confidence": 0.4499429199375111,
"calibration/prompt_uniqueness": 0.7584693678459937,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 742.2,
"completions/max_terminated_length": 524.2,
"completions/mean_length": 221.072265625,
"completions/mean_terminated_length": 220.94376220703126,
"completions/min_length": 77.8,
"completions/min_terminated_length": 77.8,
"epoch": 0.656,
"grad_norm": 0.000815459294244647,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 697632209.0,
"reward": 0.7870907783508301,
"reward_std": 0.07756655365228653,
"rewards/accuracy_reward": 0.4931640625,
"rewards/brier_reward": 0.7724098086357116,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0036199231166392566,
"rewards/frontier_coverage_1": 0.12345087826251984,
"rewards/frontier_coverage_10": 0.12345087826251984,
"rewards/frontier_coverage_15": 0.12345087826251984,
"rewards/frontier_coverage_20": 0.10757764428853989,
"rewards/frontier_coverage_25": 0.06548063829541206,
"rewards/frontier_coverage_5": 0.12345087826251984,
"rewards/frontier_ece_reward": 0.0050185761414468285,
"rewards/frontier_entropy_batch_reward": -0.039812687784433365,
"signal/accuracy_reward/centered_abs_mean": 0.09989013671875,
"signal/accuracy_reward/group_bin_occupancy": 0.175,
"signal/accuracy_reward/group_std_mean": 0.13554909825325012,
"signal/accuracy_reward/group_zero_std_frac": 0.6,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049945068359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049945068359375,
"signal/advantage_abs_mean": 0.05884964838624,
"signal/advantage_pre_scale_abs_mean": 0.05884964838624,
"signal/advantage_pre_scale_std": 0.10190211534500122,
"signal/advantage_std": 0.10190211534500122,
"signal/brier_reward/centered_abs_mean": 0.14087184071540831,
"signal/brier_reward/group_bin_occupancy": 0.8328125,
"signal/brier_reward/group_std_mean": 0.17969867587089539,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.006240516621619463,
"signal/brier_reward/weight": 0.04429857730865479,
"signal/brier_reward/weighted_centered_abs_mean": 0.006240516621619463,
"signal/format_reward/centered_abs_mean": 0.00074462890625,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0018734002020210027,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003362104669213295,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.671484375,
"signal/frontier_aurc_reward/group_std_mean": 0.005673701735213399,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.1434101118939e-05,
"signal/frontier_aurc_reward/weight": 0.021244903653860094,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.1434101118939e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18643584847450256,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_1/group_std_mean": 0.24093481302261352,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003537702839821577,
"signal/frontier_coverage_1/weight": 0.018975519016385077,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003537702839821577,
"signal/frontier_coverage_10/centered_abs_mean": 0.18643584847450256,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_10/group_std_mean": 0.24093481302261352,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003537702839821577,
"signal/frontier_coverage_10/weight": 0.018975519016385077,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003537702839821577,
"signal/frontier_coverage_15/centered_abs_mean": 0.18643584847450256,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_15/group_std_mean": 0.24093481302261352,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003537702839821577,
"signal/frontier_coverage_15/weight": 0.018975519016385077,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003537702839821577,
"signal/frontier_coverage_20/centered_abs_mean": 0.1628478139638901,
"signal/frontier_coverage_20/group_bin_occupancy": 0.861328125,
"signal/frontier_coverage_20/group_std_mean": 0.21116604804992675,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003099315334111452,
"signal/frontier_coverage_20/weight": 0.019032138213515282,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003099315334111452,
"signal/frontier_coverage_25/centered_abs_mean": 0.09084706604480744,
"signal/frontier_coverage_25/group_bin_occupancy": 0.86640625,
"signal/frontier_coverage_25/group_std_mean": 0.11897308528423309,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017579408595338464,
"signal/frontier_coverage_25/weight": 0.019350550696253778,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017579408595338464,
"signal/frontier_coverage_5/centered_abs_mean": 0.18643584847450256,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_5/group_std_mean": 0.24093481302261352,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003537702839821577,
"signal/frontier_coverage_5/weight": 0.018975519016385077,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003537702839821577,
"signal/frontier_ece_reward/centered_abs_mean": 0.007442523166537285,
"signal/frontier_ece_reward/group_bin_occupancy": 0.75546875,
"signal/frontier_ece_reward/group_std_mean": 0.009600348770618439,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001092497631907463,
"signal/frontier_ece_reward/weight": 0.14679138362407684,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001092497631907463,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06799670606851578,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.538671875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.08818065077066421,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01180825624614954,
"signal/frontier_entropy_batch_reward/weight": 0.1736803650856018,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01180825624614954,
"step": 205
},
{
"adaptive_ema/accuracy_reward": 0.4662320707951559,
"adaptive_ema/brier_reward": 0.7176291992470631,
"adaptive_ema/format_reward": 0.9538852598740333,
"adaptive_ema/frontier_aurc_reward": 0.041774003972822785,
"adaptive_ema/frontier_coverage_1": 0.14537093734430143,
"adaptive_ema/frontier_coverage_10": 0.14537093734430143,
"adaptive_ema/frontier_coverage_15": 0.14537093734430143,
"adaptive_ema/frontier_coverage_20": 0.1422973893660083,
"adaptive_ema/frontier_coverage_25": 0.12662304782701647,
"adaptive_ema/frontier_coverage_5": 0.14537093734430143,
"adaptive_ema/frontier_ece_reward": 0.05181695449107977,
"adaptive_ema/frontier_entropy_batch_reward": -0.11663399660211912,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.043847785145044324,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.021307827904820442,
"adaptive_weight/frontier_coverage_1": 0.01900416538119316,
"adaptive_weight/frontier_coverage_10": 0.01900416538119316,
"adaptive_weight/frontier_coverage_15": 0.01900416538119316,
"adaptive_weight/frontier_coverage_20": 0.019072511792182924,
"adaptive_weight/frontier_coverage_25": 0.019421060755848884,
"adaptive_weight/frontier_coverage_5": 0.01900416538119316,
"adaptive_weight/frontier_ece_reward": 0.14723817110061646,
"adaptive_weight/frontier_entropy_batch_reward": 0.17339597642421722,
"calibration/aurc": 0.31164496797877933,
"calibration/batch_distribution_entropy": 0.9663074056547905,
"calibration/batch_entropy_100bins": 0.6888697683751974,
"calibration/batch_entropy_10bins": 0.9663074056547905,
"calibration/batch_entropy_50bins": 0.7888140527081898,
"calibration/batch_uniqueness": 0.8817779541015625,
"calibration/buffer_distribution_entropy": 0.9812080629481033,
"calibration/buffer_entropy_100bins": 0.7160872698038366,
"calibration/buffer_entropy_10bins": 0.9812080629481033,
"calibration/buffer_entropy_50bins": 0.8154865216539655,
"calibration/confidence_entropy": 0.45991284860584025,
"calibration/coverage@0%": 0.01796875,
"calibration/coverage@1%": 0.01796875,
"calibration/coverage@10%": 0.13046875,
"calibration/coverage@15%": 0.178515625,
"calibration/coverage@20%": 0.26328125,
"calibration/coverage@25%": 0.31796875,
"calibration/coverage@30%": 0.443359375,
"calibration/coverage@5%": 0.037890625,
"calibration/ece": 0.145015625,
"calibration/mean_confidence": 0.467203125,
"calibration/prompt_uniqueness": 0.693310546875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 968.0,
"completions/max_terminated_length": 533.2,
"completions/mean_length": 220.28994140625,
"completions/mean_terminated_length": 220.0336151123047,
"completions/min_length": 95.2,
"completions/min_terminated_length": 95.2,
"epoch": 0.672,
"grad_norm": 0.0017700539901852608,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 714801418.0,
"reward": 0.7908611178398133,
"reward_std": 0.07649894952774047,
"rewards/accuracy_reward": 0.5140625,
"rewards/brier_reward": 0.7871044278144836,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.00326902256347239,
"rewards/frontier_coverage_1": 0.1457797884941101,
"rewards/frontier_coverage_10": 0.1457797884941101,
"rewards/frontier_coverage_15": 0.1457797884941101,
"rewards/frontier_coverage_20": 0.13143573254346846,
"rewards/frontier_coverage_25": 0.08514134362339973,
"rewards/frontier_coverage_5": 0.1457797884941101,
"rewards/frontier_ece_reward": 0.006153030414134264,
"rewards/frontier_entropy_batch_reward": -0.09628089219331741,
"signal/accuracy_reward/centered_abs_mean": 0.1017822265625,
"signal/accuracy_reward/group_bin_occupancy": 0.17421875,
"signal/accuracy_reward/group_std_mean": 0.13586196452379226,
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05089111328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05089111328125,
"signal/advantage_abs_mean": 0.05836946368217468,
"signal/advantage_pre_scale_abs_mean": 0.05836946368217468,
"signal/advantage_pre_scale_std": 0.09875391572713851,
"signal/advantage_std": 0.09875391572713851,
"signal/brier_reward/centered_abs_mean": 0.14277728796005248,
"signal/brier_reward/group_bin_occupancy": 0.794140625,
"signal/brier_reward/group_std_mean": 0.18211204409599305,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00626028710976243,
"signal/brier_reward/weight": 0.043847785145044324,
"signal/brier_reward/weighted_centered_abs_mean": 0.00626028710976243,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003248645691201091,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6609375,
"signal/frontier_aurc_reward/group_std_mean": 0.005463304091244936,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.921743770362809e-05,
"signal/frontier_aurc_reward/weight": 0.021307827904820442,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.921743770362809e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2025707632303238,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8296875,
"signal/frontier_coverage_1/group_std_mean": 0.2575278103351593,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00384969306178391,
"signal/frontier_coverage_1/weight": 0.01900416538119316,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00384969306178391,
"signal/frontier_coverage_10/centered_abs_mean": 0.2025707632303238,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8296875,
"signal/frontier_coverage_10/group_std_mean": 0.2575278103351593,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00384969306178391,
"signal/frontier_coverage_10/weight": 0.01900416538119316,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00384969306178391,
"signal/frontier_coverage_15/centered_abs_mean": 0.2025707632303238,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8296875,
"signal/frontier_coverage_15/group_std_mean": 0.2575278103351593,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00384969306178391,
"signal/frontier_coverage_15/weight": 0.01900416538119316,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00384969306178391,
"signal/frontier_coverage_20/centered_abs_mean": 0.1766321986913681,
"signal/frontier_coverage_20/group_bin_occupancy": 0.82890625,
"signal/frontier_coverage_20/group_std_mean": 0.2250331699848175,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003368807211518288,
"signal/frontier_coverage_20/weight": 0.019072511792182924,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003368807211518288,
"signal/frontier_coverage_25/centered_abs_mean": 0.10147839039564133,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8515625,
"signal/frontier_coverage_25/group_std_mean": 0.13023962080478668,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019708910025656224,
"signal/frontier_coverage_25/weight": 0.019421060755848884,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019708910025656224,
"signal/frontier_coverage_5/centered_abs_mean": 0.2025707632303238,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8296875,
"signal/frontier_coverage_5/group_std_mean": 0.2575278103351593,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00384969306178391,
"signal/frontier_coverage_5/weight": 0.01900416538119316,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00384969306178391,
"signal/frontier_ece_reward/centered_abs_mean": 0.008110674936324358,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7265625,
"signal/frontier_ece_reward/group_std_mean": 0.010212619230151177,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00119422294665128,
"signal/frontier_ece_reward/weight": 0.14723817110061646,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00119422294665128,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10843141078948974,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.534375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.13856386244297028,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01880161128938198,
"signal/frontier_entropy_batch_reward/weight": 0.17339597642421722,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01880161128938198,
"step": 210
},
{
"adaptive_ema/accuracy_reward": 0.46830501246117856,
"adaptive_ema/brier_reward": 0.7211254637666132,
"adaptive_ema/format_reward": 0.9560803170351629,
"adaptive_ema/frontier_aurc_reward": 0.03958305982387139,
"adaptive_ema/frontier_coverage_1": 0.1453660869004376,
"adaptive_ema/frontier_coverage_10": 0.1453660869004376,
"adaptive_ema/frontier_coverage_15": 0.1453660869004376,
"adaptive_ema/frontier_coverage_20": 0.1413463773504267,
"adaptive_ema/frontier_coverage_25": 0.124265991660699,
"adaptive_ema/frontier_coverage_5": 0.1453660869004376,
"adaptive_ema/frontier_ece_reward": 0.0495718913967398,
"adaptive_ema/frontier_entropy_batch_reward": -0.11451944268600305,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.043339500576257704,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.0213736355304718,
"adaptive_weight/frontier_coverage_1": 0.019019480049610137,
"adaptive_weight/frontier_coverage_10": 0.019019480049610137,
"adaptive_weight/frontier_coverage_15": 0.019019480049610137,
"adaptive_weight/frontier_coverage_20": 0.019108938798308373,
"adaptive_weight/frontier_coverage_25": 0.019489056989550592,
"adaptive_weight/frontier_coverage_5": 0.019019480049610137,
"adaptive_weight/frontier_ece_reward": 0.1477048873901367,
"adaptive_weight/frontier_entropy_batch_reward": 0.17320606112480164,
"calibration/aurc": 0.33968256897510185,
"calibration/batch_distribution_entropy": 0.9683258088969448,
"calibration/batch_entropy_100bins": 0.6988123005977697,
"calibration/batch_entropy_10bins": 0.9683258088969448,
"calibration/batch_entropy_50bins": 0.8007284420968486,
"calibration/batch_uniqueness": 0.8925719595703125,
"calibration/buffer_distribution_entropy": 0.9820925332574466,
"calibration/buffer_entropy_100bins": 0.7173383975913089,
"calibration/buffer_entropy_10bins": 0.9820925332574466,
"calibration/buffer_entropy_50bins": 0.8165489914272855,
"calibration/confidence_entropy": 0.48792837399947214,
"calibration/coverage@0%": 0.00078125,
"calibration/coverage@1%": 0.00078125,
"calibration/coverage@10%": 0.027734375,
"calibration/coverage@15%": 0.14743124999999999,
"calibration/coverage@20%": 0.26663125,
"calibration/coverage@25%": 0.43825312499999997,
"calibration/coverage@30%": 0.5667187499999999,
"calibration/coverage@5%": 0.00078125,
"calibration/ece": 0.149918890625,
"calibration/mean_confidence": 0.454228140625,
"calibration/prompt_uniqueness": 0.721783203125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 586.0,
"completions/max_terminated_length": 586.0,
"completions/mean_length": 223.670703125,
"completions/mean_terminated_length": 223.670703125,
"completions/min_length": 104.2,
"completions/min_terminated_length": 104.2,
"epoch": 0.688,
"grad_norm": 0.000957864336669445,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 732045726.0,
"reward": 0.8029464364051819,
"reward_std": 0.07481988817453385,
"rewards/accuracy_reward": 0.53095703125,
"rewards/brier_reward": 0.7833613395690918,
"rewards/format_reward": 0.998828125,
"rewards/frontier_aurc_reward": -0.0027452859794721007,
"rewards/frontier_coverage_1": 0.116272484511137,
"rewards/frontier_coverage_10": 0.116272484511137,
"rewards/frontier_coverage_15": 0.116272484511137,
"rewards/frontier_coverage_20": 0.09603669866919518,
"rewards/frontier_coverage_25": 0.062441585958003996,
"rewards/frontier_coverage_5": 0.116272484511137,
"rewards/frontier_ece_reward": 0.005209988867864013,
"rewards/frontier_entropy_batch_reward": -0.0494648601859808,
"signal/accuracy_reward/centered_abs_mean": 0.102288818359375,
"signal/accuracy_reward/group_bin_occupancy": 0.176171875,
"signal/accuracy_reward/group_std_mean": 0.13836176693439484,
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0511444091796875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0511444091796875,
"signal/advantage_abs_mean": 0.056684716045856474,
"signal/advantage_pre_scale_abs_mean": 0.056684716045856474,
"signal/advantage_pre_scale_std": 0.09830449968576431,
"signal/advantage_std": 0.09830449968576431,
"signal/brier_reward/centered_abs_mean": 0.13682132959365845,
"signal/brier_reward/group_bin_occupancy": 0.8078125,
"signal/brier_reward/group_std_mean": 0.1752842426300049,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005929951835423708,
"signal/brier_reward/weight": 0.043339500576257704,
"signal/brier_reward/weighted_centered_abs_mean": 0.005929951835423708,
"signal/format_reward/centered_abs_mean": 0.00146484375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.001537091750651598,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000732421875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000732421875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025705496780574323,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.690234375,
"signal/frontier_aurc_reward/group_std_mean": 0.004285382106900215,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.493503122124821e-05,
"signal/frontier_aurc_reward/weight": 0.0213736355304718,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.493503122124821e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19760897159576415,
"signal/frontier_coverage_1/group_bin_occupancy": 0.840625,
"signal/frontier_coverage_1/group_std_mean": 0.25142764747142793,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037585416808724403,
"signal/frontier_coverage_1/weight": 0.019019480049610137,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037585416808724403,
"signal/frontier_coverage_10/centered_abs_mean": 0.19760897159576415,
"signal/frontier_coverage_10/group_bin_occupancy": 0.840625,
"signal/frontier_coverage_10/group_std_mean": 0.25142764747142793,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037585416808724403,
"signal/frontier_coverage_10/weight": 0.019019480049610137,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037585416808724403,
"signal/frontier_coverage_15/centered_abs_mean": 0.19760897159576415,
"signal/frontier_coverage_15/group_bin_occupancy": 0.840625,
"signal/frontier_coverage_15/group_std_mean": 0.25142764747142793,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037585416808724403,
"signal/frontier_coverage_15/weight": 0.019019480049610137,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037585416808724403,
"signal/frontier_coverage_20/centered_abs_mean": 0.1612505316734314,
"signal/frontier_coverage_20/group_bin_occupancy": 0.83671875,
"signal/frontier_coverage_20/group_std_mean": 0.20602917075157165,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030814073979854584,
"signal/frontier_coverage_20/weight": 0.019108938798308373,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030814073979854584,
"signal/frontier_coverage_25/centered_abs_mean": 0.09459872543811798,
"signal/frontier_coverage_25/group_bin_occupancy": 0.861328125,
"signal/frontier_coverage_25/group_std_mean": 0.12124393731355668,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018436332466080785,
"signal/frontier_coverage_25/weight": 0.019489056989550592,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018436332466080785,
"signal/frontier_coverage_5/centered_abs_mean": 0.19760897159576415,
"signal/frontier_coverage_5/group_bin_occupancy": 0.840625,
"signal/frontier_coverage_5/group_std_mean": 0.25142764747142793,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037585416808724403,
"signal/frontier_coverage_5/weight": 0.019019480049610137,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037585416808724403,
"signal/frontier_ece_reward/centered_abs_mean": 0.007358336262404919,
"signal/frontier_ece_reward/group_bin_occupancy": 0.72421875,
"signal/frontier_ece_reward/group_std_mean": 0.009352485835552215,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010868682526051997,
"signal/frontier_ece_reward/weight": 0.1477048873901367,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010868682526051997,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08006046563386918,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.512890625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.1021534651517868,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013866147492080927,
"signal/frontier_entropy_batch_reward/weight": 0.17320606112480164,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013866147492080927,
"step": 215
},
{
"adaptive_ema/accuracy_reward": 0.4714226536770599,
"adaptive_ema/brier_reward": 0.7245687330855672,
"adaptive_ema/format_reward": 0.9582328185056775,
"adaptive_ema/frontier_aurc_reward": 0.037501169886741494,
"adaptive_ema/frontier_coverage_1": 0.1441238258614169,
"adaptive_ema/frontier_coverage_10": 0.1441238258614169,
"adaptive_ema/frontier_coverage_15": 0.1441238258614169,
"adaptive_ema/frontier_coverage_20": 0.13939415817970868,
"adaptive_ema/frontier_coverage_25": 0.12134546498309087,
"adaptive_ema/frontier_coverage_5": 0.1441238258614169,
"adaptive_ema/frontier_ece_reward": 0.047401946277577975,
"adaptive_ema/frontier_entropy_batch_reward": -0.11144402296807562,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.04283955916762352,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.02143755294382572,
"adaptive_weight/frontier_coverage_1": 0.019062766060233115,
"adaptive_weight/frontier_coverage_10": 0.019062766060233115,
"adaptive_weight/frontier_coverage_15": 0.019062766060233115,
"adaptive_weight/frontier_coverage_20": 0.019168110191822053,
"adaptive_weight/frontier_coverage_25": 0.019570105895400047,
"adaptive_weight/frontier_coverage_5": 0.019062766060233115,
"adaptive_weight/frontier_ece_reward": 0.14816365540027618,
"adaptive_weight/frontier_entropy_batch_reward": 0.1728699505329132,
"calibration/aurc": 0.2812223914615854,
"calibration/batch_distribution_entropy": 0.9758433970044276,
"calibration/batch_entropy_100bins": 0.6897804717683991,
"calibration/batch_entropy_10bins": 0.9758433970044276,
"calibration/batch_entropy_50bins": 0.7924209928664961,
"calibration/batch_uniqueness": 0.8869873046875,
"calibration/buffer_distribution_entropy": 0.9829288465120015,
"calibration/buffer_entropy_100bins": 0.7186293500721178,
"calibration/buffer_entropy_10bins": 0.9829288465120015,
"calibration/buffer_entropy_50bins": 0.8175469891347094,
"calibration/confidence_entropy": 0.46794092314030333,
"calibration/coverage@0%": 0.0046875,
"calibration/coverage@1%": 0.0046875,
"calibration/coverage@10%": 0.044140625,
"calibration/coverage@15%": 0.176953125,
"calibration/coverage@20%": 0.25234375,
"calibration/coverage@25%": 0.408984375,
"calibration/coverage@30%": 0.62265625,
"calibration/coverage@5%": 0.0046875,
"calibration/ece": 0.11982812500000002,
"calibration/mean_confidence": 0.496578125,
"calibration/prompt_uniqueness": 0.688330078125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 534.0,
"completions/max_terminated_length": 534.0,
"completions/mean_length": 224.83818359375,
"completions/mean_terminated_length": 224.83818359375,
"completions/min_length": 102.8,
"completions/min_terminated_length": 102.8,
"epoch": 0.704,
"grad_norm": 0.0006922923494130373,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 749214213.0,
"reward": 0.808210837841034,
"reward_std": 0.06902927160263062,
"rewards/accuracy_reward": 0.5416015625,
"rewards/brier_reward": 0.7921440362930298,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0030134733766317367,
"rewards/frontier_coverage_1": 0.1116558000445366,
"rewards/frontier_coverage_10": 0.1116558000445366,
"rewards/frontier_coverage_15": 0.1116558000445366,
"rewards/frontier_coverage_20": 0.09597670137882233,
"rewards/frontier_coverage_25": 0.06279801651835441,
"rewards/frontier_coverage_5": 0.1116558000445366,
"rewards/frontier_ece_reward": 0.005176219716668129,
"rewards/frontier_entropy_batch_reward": -0.05135298371315002,
"signal/accuracy_reward/centered_abs_mean": 0.08553466796875,
"signal/accuracy_reward/group_bin_occupancy": 0.169921875,
"signal/accuracy_reward/group_std_mean": 0.11762015819549561,
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042767333984375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.042767333984375,
"signal/advantage_abs_mean": 0.052474270761013034,
"signal/advantage_pre_scale_abs_mean": 0.052474270761013034,
"signal/advantage_pre_scale_std": 0.0917344942688942,
"signal/advantage_std": 0.0917344942688942,
"signal/brier_reward/centered_abs_mean": 0.13383509516716002,
"signal/brier_reward/group_bin_occupancy": 0.8109375,
"signal/brier_reward/group_std_mean": 0.17180217504501344,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005733540188521147,
"signal/brier_reward/weight": 0.04283955916762352,
"signal/brier_reward/weighted_centered_abs_mean": 0.005733540188521147,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028782275738194587,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6734375,
"signal/frontier_aurc_reward/group_std_mean": 0.0045921245589852335,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.170250489958561e-05,
"signal/frontier_aurc_reward/weight": 0.02143755294382572,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.170250489958561e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18142320215702057,
"signal/frontier_coverage_1/group_bin_occupancy": 0.840234375,
"signal/frontier_coverage_1/group_std_mean": 0.2332333356142044,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034584174398332836,
"signal/frontier_coverage_1/weight": 0.019062766060233115,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034584174398332836,
"signal/frontier_coverage_10/centered_abs_mean": 0.18142320215702057,
"signal/frontier_coverage_10/group_bin_occupancy": 0.840234375,
"signal/frontier_coverage_10/group_std_mean": 0.2332333356142044,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034584174398332836,
"signal/frontier_coverage_10/weight": 0.019062766060233115,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034584174398332836,
"signal/frontier_coverage_15/centered_abs_mean": 0.18142320215702057,
"signal/frontier_coverage_15/group_bin_occupancy": 0.840234375,
"signal/frontier_coverage_15/group_std_mean": 0.2332333356142044,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034584174398332836,
"signal/frontier_coverage_15/weight": 0.019062766060233115,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034584174398332836,
"signal/frontier_coverage_20/centered_abs_mean": 0.14190484583377838,
"signal/frontier_coverage_20/group_bin_occupancy": 0.831640625,
"signal/frontier_coverage_20/group_std_mean": 0.1834350824356079,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027199681848287583,
"signal/frontier_coverage_20/weight": 0.019168110191822053,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027199681848287583,
"signal/frontier_coverage_25/centered_abs_mean": 0.08425341546535492,
"signal/frontier_coverage_25/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_25/group_std_mean": 0.1094443678855896,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016488022403791547,
"signal/frontier_coverage_25/weight": 0.019570105895400047,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016488022403791547,
"signal/frontier_coverage_5/centered_abs_mean": 0.18142320215702057,
"signal/frontier_coverage_5/group_bin_occupancy": 0.840234375,
"signal/frontier_coverage_5/group_std_mean": 0.2332333356142044,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034584174398332836,
"signal/frontier_coverage_5/weight": 0.019062766060233115,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034584174398332836,
"signal/frontier_ece_reward/centered_abs_mean": 0.007252698577940464,
"signal/frontier_ece_reward/group_bin_occupancy": 0.70625,
"signal/frontier_ece_reward/group_std_mean": 0.009230658039450645,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010745900450274347,
"signal/frontier_ece_reward/weight": 0.14816365540027618,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010745900450274347,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07674806118011475,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5296875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.09836492389440536,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013267523981630803,
"signal/frontier_entropy_batch_reward/weight": 0.1728699505329132,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013267523981630803,
"step": 220
},
{
"adaptive_ema/accuracy_reward": 0.4757423695441325,
"adaptive_ema/brier_reward": 0.7279110297441888,
"adaptive_ema/format_reward": 0.9602750013781336,
"adaptive_ema/frontier_aurc_reward": 0.03552184370867116,
"adaptive_ema/frontier_coverage_1": 0.14200073041665678,
"adaptive_ema/frontier_coverage_10": 0.14200073041665678,
"adaptive_ema/frontier_coverage_15": 0.14200073041665678,
"adaptive_ema/frontier_coverage_20": 0.13670227580543237,
"adaptive_ema/frontier_coverage_25": 0.11821497614335949,
"adaptive_ema/frontier_coverage_5": 0.14200073041665678,
"adaptive_ema/frontier_ece_reward": 0.04532540390968216,
"adaptive_ema/frontier_entropy_batch_reward": -0.10826526292922997,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.04234748482704163,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.021495748311281204,
"adaptive_weight/frontier_coverage_1": 0.01912260763347149,
"adaptive_weight/frontier_coverage_10": 0.01912260763347149,
"adaptive_weight/frontier_coverage_15": 0.01912260763347149,
"adaptive_weight/frontier_coverage_20": 0.019240697100758552,
"adaptive_weight/frontier_coverage_25": 0.01965273208916187,
"adaptive_weight/frontier_coverage_5": 0.01912260763347149,
"adaptive_weight/frontier_ece_reward": 0.14858417212963104,
"adaptive_weight/frontier_entropy_batch_reward": 0.172488734126091,
"calibration/aurc": 0.26860360703296127,
"calibration/batch_distribution_entropy": 0.9832870149703636,
"calibration/batch_entropy_100bins": 0.7096184414476265,
"calibration/batch_entropy_10bins": 0.9832870149703636,
"calibration/batch_entropy_50bins": 0.8095542428846242,
"calibration/batch_uniqueness": 0.9003417386613531,
"calibration/buffer_distribution_entropy": 0.9841210799372659,
"calibration/buffer_entropy_100bins": 0.7201560903265631,
"calibration/buffer_entropy_10bins": 0.9841210799372659,
"calibration/buffer_entropy_50bins": 0.8186714930627087,
"calibration/confidence_entropy": 0.4777731746405983,
"calibration/coverage@0%": 0.002734375,
"calibration/coverage@1%": 0.002734375,
"calibration/coverage@10%": 0.18984375,
"calibration/coverage@15%": 0.24609375,
"calibration/coverage@20%": 0.37578125,
"calibration/coverage@25%": 0.55234375,
"calibration/coverage@30%": 0.629296875,
"calibration/coverage@5%": 0.1015625,
"calibration/ece": 0.14447926576259787,
"calibration/mean_confidence": 0.4992754890074608,
"calibration/prompt_uniqueness": 0.7325255776047086,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 913.6,
"completions/max_terminated_length": 521.0,
"completions/mean_length": 223.48916015625,
"completions/mean_terminated_length": 223.23304748535156,
"completions/min_length": 101.4,
"completions/min_terminated_length": 101.4,
"epoch": 0.72,
"grad_norm": 0.0009339398820884526,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 766512598.0,
"reward": 0.8165707230567932,
"reward_std": 0.07224588543176651,
"rewards/accuracy_reward": 0.55751953125,
"rewards/brier_reward": 0.7996157526969909,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002686341805383563,
"rewards/frontier_coverage_1": 0.10848188325762749,
"rewards/frontier_coverage_10": 0.10848188325762749,
"rewards/frontier_coverage_15": 0.10848188325762749,
"rewards/frontier_coverage_20": 0.09124723896384239,
"rewards/frontier_coverage_25": 0.06009881421923637,
"rewards/frontier_coverage_5": 0.10848188325762749,
"rewards/frontier_ece_reward": 0.0052391432225704195,
"rewards/frontier_entropy_batch_reward": -0.04627900570631027,
"signal/accuracy_reward/centered_abs_mean": 0.093353271484375,
"signal/accuracy_reward/group_bin_occupancy": 0.171875,
"signal/accuracy_reward/group_std_mean": 0.12714605182409286,
"signal/accuracy_reward/group_zero_std_frac": 0.625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0466766357421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0466766357421875,
"signal/advantage_abs_mean": 0.05387095212936401,
"signal/advantage_pre_scale_abs_mean": 0.05387095212936401,
"signal/advantage_pre_scale_std": 0.0951445609331131,
"signal/advantage_std": 0.0951445609331131,
"signal/brier_reward/centered_abs_mean": 0.12789989858865738,
"signal/brier_reward/group_bin_occupancy": 0.81640625,
"signal/brier_reward/group_std_mean": 0.16612654328346252,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005415673460811376,
"signal/brier_reward/weight": 0.04234748482704163,
"signal/brier_reward/weighted_centered_abs_mean": 0.005415673460811376,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025467033963650467,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6765625,
"signal/frontier_aurc_reward/group_std_mean": 0.004340066667646169,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4739906045142564e-05,
"signal/frontier_aurc_reward/weight": 0.021495748311281204,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4739906045142564e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17897019982337953,
"signal/frontier_coverage_1/group_bin_occupancy": 0.85,
"signal/frontier_coverage_1/group_std_mean": 0.2304094761610031,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003422551741823554,
"signal/frontier_coverage_1/weight": 0.01912260763347149,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003422551741823554,
"signal/frontier_coverage_10/centered_abs_mean": 0.17897019982337953,
"signal/frontier_coverage_10/group_bin_occupancy": 0.85,
"signal/frontier_coverage_10/group_std_mean": 0.2304094761610031,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003422551741823554,
"signal/frontier_coverage_10/weight": 0.01912260763347149,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003422551741823554,
"signal/frontier_coverage_15/centered_abs_mean": 0.17897019982337953,
"signal/frontier_coverage_15/group_bin_occupancy": 0.85,
"signal/frontier_coverage_15/group_std_mean": 0.2304094761610031,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003422551741823554,
"signal/frontier_coverage_15/weight": 0.01912260763347149,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003422551741823554,
"signal/frontier_coverage_20/centered_abs_mean": 0.13409923911094665,
"signal/frontier_coverage_20/group_bin_occupancy": 0.837109375,
"signal/frontier_coverage_20/group_std_mean": 0.17386512756347655,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025803008582443,
"signal/frontier_coverage_20/weight": 0.019240697100758552,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025803008582443,
"signal/frontier_coverage_25/centered_abs_mean": 0.07745042741298676,
"signal/frontier_coverage_25/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_25/group_std_mean": 0.10080204159021378,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015221974346786738,
"signal/frontier_coverage_25/weight": 0.01965273208916187,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015221974346786738,
"signal/frontier_coverage_5/centered_abs_mean": 0.17897019982337953,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85,
"signal/frontier_coverage_5/group_std_mean": 0.2304094761610031,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003422551741823554,
"signal/frontier_coverage_5/weight": 0.01912260763347149,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003422551741823554,
"signal/frontier_ece_reward/centered_abs_mean": 0.007032676786184311,
"signal/frontier_ece_reward/group_bin_occupancy": 0.715625,
"signal/frontier_ece_reward/group_std_mean": 0.00889311209321022,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010449515888467432,
"signal/frontier_ece_reward/weight": 0.14858417212963104,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010449515888467432,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07925714552402496,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.56171875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.09801054894924163,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01367063745856285,
"signal/frontier_entropy_batch_reward/weight": 0.172488734126091,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01367063745856285,
"step": 225
},
{
"adaptive_ema/accuracy_reward": 0.48034190133964305,
"adaptive_ema/brier_reward": 0.7311690908832558,
"adaptive_ema/format_reward": 0.9622152208058333,
"adaptive_ema/frontier_aurc_reward": 0.03366201988413342,
"adaptive_ema/frontier_coverage_1": 0.13972719591673705,
"adaptive_ema/frontier_coverage_10": 0.13972719591673705,
"adaptive_ema/frontier_coverage_15": 0.13972719591673705,
"adaptive_ema/frontier_coverage_20": 0.13416882024286453,
"adaptive_ema/frontier_coverage_25": 0.11526060077836695,
"adaptive_ema/frontier_coverage_5": 0.13972719591673705,
"adaptive_ema/frontier_ece_reward": 0.04333311199995612,
"adaptive_ema/frontier_entropy_batch_reward": -0.10496182388959377,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.04186927527189255,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.02155204601585865,
"adaptive_weight/frontier_coverage_1": 0.01918649598956108,
"adaptive_weight/frontier_coverage_10": 0.01918649598956108,
"adaptive_weight/frontier_coverage_15": 0.01918649598956108,
"adaptive_weight/frontier_coverage_20": 0.019310463592410086,
"adaptive_weight/frontier_coverage_25": 0.019732169806957245,
"adaptive_weight/frontier_coverage_5": 0.01918649598956108,
"adaptive_weight/frontier_ece_reward": 0.14899688959121704,
"adaptive_weight/frontier_entropy_batch_reward": 0.17209317088127135,
"calibration/aurc": 0.2774290384797605,
"calibration/batch_distribution_entropy": 0.975751417418882,
"calibration/batch_entropy_100bins": 0.7119109716006123,
"calibration/batch_entropy_10bins": 0.975751417418882,
"calibration/batch_entropy_50bins": 0.8102837692413288,
"calibration/batch_uniqueness": 0.898162841796875,
"calibration/buffer_distribution_entropy": 0.985121896835086,
"calibration/buffer_entropy_100bins": 0.722076707653373,
"calibration/buffer_entropy_10bins": 0.985121896835086,
"calibration/buffer_entropy_50bins": 0.820123186724713,
"calibration/confidence_entropy": 0.48087078575614656,
"calibration/coverage@0%": 0.00390625,
"calibration/coverage@1%": 0.00390625,
"calibration/coverage@10%": 0.048828125,
"calibration/coverage@15%": 0.210546875,
"calibration/coverage@20%": 0.35078125,
"calibration/coverage@25%": 0.488671875,
"calibration/coverage@30%": 0.5984375,
"calibration/coverage@5%": 0.00390625,
"calibration/ece": 0.14718359374999998,
"calibration/mean_confidence": 0.5199648437500001,
"calibration/prompt_uniqueness": 0.725732421875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 692.8,
"completions/max_terminated_length": 487.6,
"completions/mean_length": 221.00791015625,
"completions/mean_terminated_length": 220.75122985839843,
"completions/min_length": 101.4,
"completions/min_terminated_length": 101.4,
"epoch": 0.736,
"grad_norm": 0.002505439566448331,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 783715303.0,
"reward": 0.8198559522628784,
"reward_std": 0.07011436522006989,
"rewards/accuracy_reward": 0.56396484375,
"rewards/brier_reward": 0.7870771884918213,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002653352613560855,
"rewards/frontier_coverage_1": 0.09510880410671234,
"rewards/frontier_coverage_10": 0.09510880410671234,
"rewards/frontier_coverage_15": 0.09510880410671234,
"rewards/frontier_coverage_20": 0.08357458561658859,
"rewards/frontier_coverage_25": 0.05747109428048134,
"rewards/frontier_coverage_5": 0.09510880410671234,
"rewards/frontier_ece_reward": 0.004429191770032048,
"rewards/frontier_entropy_batch_reward": -0.033060839772224425,
"signal/accuracy_reward/centered_abs_mean": 0.092315673828125,
"signal/accuracy_reward/group_bin_occupancy": 0.171484375,
"signal/accuracy_reward/group_std_mean": 0.1249350905418396,
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0461578369140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0461578369140625,
"signal/advantage_abs_mean": 0.05301322415471077,
"signal/advantage_pre_scale_abs_mean": 0.05301322415471077,
"signal/advantage_pre_scale_std": 0.09370943009853364,
"signal/advantage_std": 0.09370943009853364,
"signal/brier_reward/centered_abs_mean": 0.13454246371984482,
"signal/brier_reward/group_bin_occupancy": 0.8171875,
"signal/brier_reward/group_std_mean": 0.17263633012771606,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0056332145817577835,
"signal/brier_reward/weight": 0.04186927527189255,
"signal/brier_reward/weighted_centered_abs_mean": 0.0056332145817577835,
"signal/format_reward/centered_abs_mean": 0.0003662109375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.000768545875325799,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00018310546875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025338149163872,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.671484375,
"signal/frontier_aurc_reward/group_std_mean": 0.004274234082549811,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4615944827673955e-05,
"signal/frontier_aurc_reward/weight": 0.02155204601585865,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4615944827673955e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1820545345544815,
"signal/frontier_coverage_1/group_bin_occupancy": 0.837109375,
"signal/frontier_coverage_1/group_std_mean": 0.23294652104377747,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034930593799799682,
"signal/frontier_coverage_1/weight": 0.01918649598956108,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034930593799799682,
"signal/frontier_coverage_10/centered_abs_mean": 0.1820545345544815,
"signal/frontier_coverage_10/group_bin_occupancy": 0.837109375,
"signal/frontier_coverage_10/group_std_mean": 0.23294652104377747,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034930593799799682,
"signal/frontier_coverage_10/weight": 0.01918649598956108,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034930593799799682,
"signal/frontier_coverage_15/centered_abs_mean": 0.1820545345544815,
"signal/frontier_coverage_15/group_bin_occupancy": 0.837109375,
"signal/frontier_coverage_15/group_std_mean": 0.23294652104377747,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034930593799799682,
"signal/frontier_coverage_15/weight": 0.01918649598956108,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034930593799799682,
"signal/frontier_coverage_20/centered_abs_mean": 0.13491614758968354,
"signal/frontier_coverage_20/group_bin_occupancy": 0.825390625,
"signal/frontier_coverage_20/group_std_mean": 0.17339130043983458,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026054022833704948,
"signal/frontier_coverage_20/weight": 0.019310463592410086,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026054022833704948,
"signal/frontier_coverage_25/centered_abs_mean": 0.07765587270259858,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8625,
"signal/frontier_coverage_25/group_std_mean": 0.1002039521932602,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015323557192459703,
"signal/frontier_coverage_25/weight": 0.019732169806957245,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015323557192459703,
"signal/frontier_coverage_5/centered_abs_mean": 0.1820545345544815,
"signal/frontier_coverage_5/group_bin_occupancy": 0.837109375,
"signal/frontier_coverage_5/group_std_mean": 0.23294652104377747,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034930593799799682,
"signal/frontier_coverage_5/weight": 0.01918649598956108,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034930593799799682,
"signal/frontier_ece_reward/centered_abs_mean": 0.006834933627396822,
"signal/frontier_ece_reward/group_bin_occupancy": 0.71484375,
"signal/frontier_ece_reward/group_std_mean": 0.00876548495143652,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010184106649830937,
"signal/frontier_ece_reward/weight": 0.14899688959121704,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010184106649830937,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06465991437435151,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.524609375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.08424876034259796,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011128241941332818,
"signal/frontier_entropy_batch_reward/weight": 0.17209317088127135,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011128241941332818,
"step": 230
},
{
"adaptive_ema/accuracy_reward": 0.4846319086158449,
"adaptive_ema/brier_reward": 0.7338147443526692,
"adaptive_ema/format_reward": 0.9640594318178,
"adaptive_ema/frontier_aurc_reward": 0.031872542136557096,
"adaptive_ema/frontier_coverage_1": 0.1375201093728598,
"adaptive_ema/frontier_coverage_10": 0.1375201093728598,
"adaptive_ema/frontier_coverage_15": 0.1375201093728598,
"adaptive_ema/frontier_coverage_20": 0.13135532068478054,
"adaptive_ema/frontier_coverage_25": 0.11224977167595156,
"adaptive_ema/frontier_coverage_5": 0.1375201093728598,
"adaptive_ema/frontier_ece_reward": 0.0414305952665723,
"adaptive_ema/frontier_entropy_batch_reward": -0.10191647726630064,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.041475728154182434,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.02160160094499588,
"adaptive_weight/frontier_coverage_1": 0.01924430951476097,
"adaptive_weight/frontier_coverage_10": 0.01924430951476097,
"adaptive_weight/frontier_coverage_15": 0.01924430951476097,
"adaptive_weight/frontier_coverage_20": 0.019381864368915556,
"adaptive_weight/frontier_coverage_25": 0.01980816349387169,
"adaptive_weight/frontier_coverage_5": 0.01924430951476097,
"adaptive_weight/frontier_ece_reward": 0.14935987889766694,
"adaptive_weight/frontier_entropy_batch_reward": 0.17169552445411682,
"calibration/aurc": 0.2963728152958468,
"calibration/batch_distribution_entropy": 0.9633717819728546,
"calibration/batch_entropy_100bins": 0.6890503821265225,
"calibration/batch_entropy_10bins": 0.9633717819728546,
"calibration/batch_entropy_50bins": 0.7882439531361687,
"calibration/batch_uniqueness": 0.8822235107421875,
"calibration/buffer_distribution_entropy": 0.9859966226633015,
"calibration/buffer_entropy_100bins": 0.7234582418960763,
"calibration/buffer_entropy_10bins": 0.9859966226633015,
"calibration/buffer_entropy_50bins": 0.821087380334359,
"calibration/confidence_entropy": 0.4473816058373828,
"calibration/coverage@0%": 0.011328125,
"calibration/coverage@1%": 0.011328125,
"calibration/coverage@10%": 0.076171875,
"calibration/coverage@15%": 0.18984375,
"calibration/coverage@20%": 0.2984375,
"calibration/coverage@25%": 0.4109375,
"calibration/coverage@30%": 0.56875,
"calibration/coverage@5%": 0.011328125,
"calibration/ece": 0.12555078125000002,
"calibration/mean_confidence": 0.45226953124999997,
"calibration/prompt_uniqueness": 0.702001953125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 449.6,
"completions/max_terminated_length": 449.6,
"completions/mean_length": 215.694140625,
"completions/mean_terminated_length": 215.694140625,
"completions/min_length": 102.4,
"completions/min_terminated_length": 102.4,
"epoch": 0.752,
"grad_norm": 0.0009011356742121279,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 801151211.0,
"reward": 0.8150582075119018,
"reward_std": 0.0696170151233673,
"rewards/accuracy_reward": 0.55830078125,
"rewards/brier_reward": 0.791687285900116,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.003065359219908714,
"rewards/frontier_coverage_1": 0.1088890254497528,
"rewards/frontier_coverage_10": 0.1088890254497528,
"rewards/frontier_coverage_15": 0.1088890254497528,
"rewards/frontier_coverage_20": 0.08239309936761856,
"rewards/frontier_coverage_25": 0.05571244210004807,
"rewards/frontier_coverage_5": 0.1088890254497528,
"rewards/frontier_ece_reward": 0.004906528582796455,
"rewards/frontier_entropy_batch_reward": -0.05090191811323166,
"signal/accuracy_reward/centered_abs_mean": 0.088250732421875,
"signal/accuracy_reward/group_bin_occupancy": 0.16796875,
"signal/accuracy_reward/group_std_mean": 0.11853147447109222,
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0441253662109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0441253662109375,
"signal/advantage_abs_mean": 0.05262523740530014,
"signal/advantage_pre_scale_abs_mean": 0.05262523740530014,
"signal/advantage_pre_scale_std": 0.09439008533954621,
"signal/advantage_std": 0.09439008533954621,
"signal/brier_reward/centered_abs_mean": 0.12752344012260436,
"signal/brier_reward/group_bin_occupancy": 0.80859375,
"signal/brier_reward/group_std_mean": 0.16469366252422332,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005290476325899363,
"signal/brier_reward/weight": 0.041475728154182434,
"signal/brier_reward/weighted_centered_abs_mean": 0.005290476325899363,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002861019968986511,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.66484375,
"signal/frontier_aurc_reward/group_std_mean": 0.00461784191429615,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.180902564665303e-05,
"signal/frontier_aurc_reward/weight": 0.02160160094499588,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.180902564665303e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1756511449813843,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8453125,
"signal/frontier_coverage_1/group_std_mean": 0.2248201698064804,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033802258782088756,
"signal/frontier_coverage_1/weight": 0.01924430951476097,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033802258782088756,
"signal/frontier_coverage_10/centered_abs_mean": 0.1756511449813843,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8453125,
"signal/frontier_coverage_10/group_std_mean": 0.2248201698064804,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033802258782088756,
"signal/frontier_coverage_10/weight": 0.01924430951476097,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033802258782088756,
"signal/frontier_coverage_15/centered_abs_mean": 0.1756511449813843,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8453125,
"signal/frontier_coverage_15/group_std_mean": 0.2248201698064804,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033802258782088756,
"signal/frontier_coverage_15/weight": 0.01924430951476097,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033802258782088756,
"signal/frontier_coverage_20/centered_abs_mean": 0.12954683005809783,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8234375,
"signal/frontier_coverage_20/group_std_mean": 0.16692675650119781,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002510684449225664,
"signal/frontier_coverage_20/weight": 0.019381864368915556,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002510684449225664,
"signal/frontier_coverage_25/centered_abs_mean": 0.07408646047115326,
"signal/frontier_coverage_25/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_25/group_std_mean": 0.09557124227285385,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014674163889139892,
"signal/frontier_coverage_25/weight": 0.01980816349387169,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014674163889139892,
"signal/frontier_coverage_5/centered_abs_mean": 0.1756511449813843,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8453125,
"signal/frontier_coverage_5/group_std_mean": 0.2248201698064804,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033802258782088756,
"signal/frontier_coverage_5/weight": 0.01924430951476097,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033802258782088756,
"signal/frontier_ece_reward/centered_abs_mean": 0.006796565931290388,
"signal/frontier_ece_reward/group_bin_occupancy": 0.709765625,
"signal/frontier_ece_reward/group_std_mean": 0.00861854236572981,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010151177062653004,
"signal/frontier_ece_reward/weight": 0.14935987889766694,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010151177062653004,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08200211226940154,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.54140625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.10221528112888337,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01407961007207632,
"signal/frontier_entropy_batch_reward/weight": 0.17169552445411682,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01407961007207632,
"step": 235
},
{
"adaptive_ema/accuracy_reward": 0.4862951098712302,
"adaptive_ema/brier_reward": 0.7370724491017904,
"adaptive_ema/format_reward": 0.9658179767826939,
"adaptive_ema/frontier_aurc_reward": 0.030162166301981302,
"adaptive_ema/frontier_coverage_1": 0.1377375083668309,
"adaptive_ema/frontier_coverage_10": 0.1377375083668309,
"adaptive_ema/frontier_coverage_15": 0.1377375083668309,
"adaptive_ema/frontier_coverage_20": 0.13041548891361807,
"adaptive_ema/frontier_coverage_25": 0.11005992251250514,
"adaptive_ema/frontier_coverage_5": 0.1377375083668309,
"adaptive_ema/frontier_ece_reward": 0.03967249820244284,
"adaptive_ema/frontier_entropy_batch_reward": -0.10005694398465499,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.0410037562251091,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.02165858820080757,
"adaptive_weight/frontier_coverage_1": 0.019256194308400153,
"adaptive_weight/frontier_coverage_10": 0.019256194308400153,
"adaptive_weight/frontier_coverage_15": 0.019256194308400153,
"adaptive_weight/frontier_coverage_20": 0.0194197129458189,
"adaptive_weight/frontier_coverage_25": 0.019874298945069313,
"adaptive_weight/frontier_coverage_5": 0.019256194308400153,
"adaptive_weight/frontier_ece_reward": 0.14976398646831512,
"adaptive_weight/frontier_entropy_batch_reward": 0.17155487537384034,
"calibration/aurc": 0.2890387688744652,
"calibration/batch_distribution_entropy": 0.9747367306286249,
"calibration/batch_entropy_100bins": 0.7005247252075868,
"calibration/batch_entropy_10bins": 0.9747367306286249,
"calibration/batch_entropy_50bins": 0.8037471501457052,
"calibration/batch_uniqueness": 0.896148681640625,
"calibration/buffer_distribution_entropy": 0.9864059528029558,
"calibration/buffer_entropy_100bins": 0.723835422585013,
"calibration/buffer_entropy_10bins": 0.9864059528029558,
"calibration/buffer_entropy_50bins": 0.8212176638152784,
"calibration/confidence_entropy": 0.4836140466275616,
"calibration/coverage@0%": 0.020703125,
"calibration/coverage@1%": 0.020703125,
"calibration/coverage@10%": 0.23359375,
"calibration/coverage@15%": 0.274609375,
"calibration/coverage@20%": 0.38359375,
"calibration/coverage@25%": 0.43828125,
"calibration/coverage@30%": 0.51171875,
"calibration/coverage@5%": 0.152734375,
"calibration/ece": 0.16974999999999998,
"calibration/mean_confidence": 0.45348437500000005,
"calibration/prompt_uniqueness": 0.75390625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 472.0,
"completions/max_terminated_length": 472.0,
"completions/mean_length": 220.0798828125,
"completions/mean_terminated_length": 220.0798828125,
"completions/min_length": 95.4,
"completions/min_terminated_length": 95.4,
"epoch": 0.768,
"grad_norm": 0.0007895245798863471,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 818337533.0,
"reward": 0.7931086540222168,
"reward_std": 0.07328220456838608,
"rewards/accuracy_reward": 0.51162109375,
"rewards/brier_reward": 0.8012859940528869,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002820562291890383,
"rewards/frontier_coverage_1": 0.1454554319381714,
"rewards/frontier_coverage_10": 0.1454554319381714,
"rewards/frontier_coverage_15": 0.1454554319381714,
"rewards/frontier_coverage_20": 0.11572380065917968,
"rewards/frontier_coverage_25": 0.0690992683172226,
"rewards/frontier_coverage_5": 0.1454554319381714,
"rewards/frontier_ece_reward": 0.00546963894739747,
"rewards/frontier_entropy_batch_reward": -0.06505972109735011,
"signal/accuracy_reward/centered_abs_mean": 0.091497802734375,
"signal/accuracy_reward/group_bin_occupancy": 0.16953125,
"signal/accuracy_reward/group_std_mean": 0.12290232628583908,
"signal/accuracy_reward/group_zero_std_frac": 0.64375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0457489013671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0457489013671875,
"signal/advantage_abs_mean": 0.0559655487537384,
"signal/advantage_pre_scale_abs_mean": 0.0559655487537384,
"signal/advantage_pre_scale_std": 0.09721155613660812,
"signal/advantage_std": 0.09721155613660812,
"signal/brier_reward/centered_abs_mean": 0.1295279458165169,
"signal/brier_reward/group_bin_occupancy": 0.809765625,
"signal/brier_reward/group_std_mean": 0.1659324049949646,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005311077646911144,
"signal/brier_reward/weight": 0.0410037562251091,
"signal/brier_reward/weighted_centered_abs_mean": 0.005311077646911144,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026746280957013367,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.673828125,
"signal/frontier_aurc_reward/group_std_mean": 0.004395715426653624,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.7932342315325513e-05,
"signal/frontier_aurc_reward/weight": 0.02165858820080757,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.7932342315325513e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18093566000461578,
"signal/frontier_coverage_1/group_bin_occupancy": 0.85234375,
"signal/frontier_coverage_1/group_std_mean": 0.23029330968856812,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034840914886444807,
"signal/frontier_coverage_1/weight": 0.019256194308400153,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034840914886444807,
"signal/frontier_coverage_10/centered_abs_mean": 0.18093566000461578,
"signal/frontier_coverage_10/group_bin_occupancy": 0.85234375,
"signal/frontier_coverage_10/group_std_mean": 0.23029330968856812,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034840914886444807,
"signal/frontier_coverage_10/weight": 0.019256194308400153,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034840914886444807,
"signal/frontier_coverage_15/centered_abs_mean": 0.18093566000461578,
"signal/frontier_coverage_15/group_bin_occupancy": 0.85234375,
"signal/frontier_coverage_15/group_std_mean": 0.23029330968856812,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034840914886444807,
"signal/frontier_coverage_15/weight": 0.019256194308400153,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034840914886444807,
"signal/frontier_coverage_20/centered_abs_mean": 0.13377536982297897,
"signal/frontier_coverage_20/group_bin_occupancy": 0.844140625,
"signal/frontier_coverage_20/group_std_mean": 0.17080979347229003,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025977870915085076,
"signal/frontier_coverage_20/weight": 0.0194197129458189,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025977870915085076,
"signal/frontier_coverage_25/centered_abs_mean": 0.07524611651897431,
"signal/frontier_coverage_25/group_bin_occupancy": 0.878515625,
"signal/frontier_coverage_25/group_std_mean": 0.09601740390062333,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014954280573874711,
"signal/frontier_coverage_25/weight": 0.019874298945069313,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014954280573874711,
"signal/frontier_coverage_5/centered_abs_mean": 0.18093566000461578,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85234375,
"signal/frontier_coverage_5/group_std_mean": 0.23029330968856812,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034840914886444807,
"signal/frontier_coverage_5/weight": 0.019256194308400153,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034840914886444807,
"signal/frontier_ece_reward/centered_abs_mean": 0.00658888490870595,
"signal/frontier_ece_reward/group_bin_occupancy": 0.721875,
"signal/frontier_ece_reward/group_std_mean": 0.008340070582926273,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000986759876832366,
"signal/frontier_ece_reward/weight": 0.14976398646831512,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000986759876832366,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09385951161384583,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.545703125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.11752689033746719,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01610282640904188,
"signal/frontier_entropy_batch_reward/weight": 0.17155487537384034,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01610282640904188,
"step": 240
},
{
"adaptive_ema/accuracy_reward": 0.48961167774338754,
"adaptive_ema/brier_reward": 0.7395506917762349,
"adaptive_ema/format_reward": 0.967481739658895,
"adaptive_ema/frontier_aurc_reward": 0.02854741451555499,
"adaptive_ema/frontier_coverage_1": 0.1359473757242809,
"adaptive_ema/frontier_coverage_10": 0.1359473757242809,
"adaptive_ema/frontier_coverage_15": 0.1359473757242809,
"adaptive_ema/frontier_coverage_20": 0.1279098750512469,
"adaptive_ema/frontier_coverage_25": 0.10728999219979576,
"adaptive_ema/frontier_coverage_5": 0.1359473757242809,
"adaptive_ema/frontier_ece_reward": 0.037937546839066597,
"adaptive_ema/frontier_entropy_batch_reward": -0.09750663975242849,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.040633540600538254,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.021703314781188966,
"adaptive_weight/frontier_coverage_1": 0.019303881376981736,
"adaptive_weight/frontier_coverage_10": 0.019303881376981736,
"adaptive_weight/frontier_coverage_15": 0.019303881376981736,
"adaptive_weight/frontier_coverage_20": 0.019483447819948197,
"adaptive_weight/frontier_coverage_25": 0.019944119080901147,
"adaptive_weight/frontier_coverage_5": 0.019303881376981736,
"adaptive_weight/frontier_ece_reward": 0.15009447634220124,
"adaptive_weight/frontier_entropy_batch_reward": 0.17122556865215302,
"calibration/aurc": 0.33736045830547284,
"calibration/batch_distribution_entropy": 0.9776407299368929,
"calibration/batch_entropy_100bins": 0.7138942156860587,
"calibration/batch_entropy_10bins": 0.9776407299368929,
"calibration/batch_entropy_50bins": 0.8104821318344806,
"calibration/batch_uniqueness": 0.895302625944048,
"calibration/buffer_distribution_entropy": 0.9869873126911267,
"calibration/buffer_entropy_100bins": 0.7250785459370659,
"calibration/buffer_entropy_10bins": 0.9869873126911267,
"calibration/buffer_entropy_50bins": 0.8220233233940262,
"calibration/confidence_entropy": 0.4486780646244971,
"calibration/coverage@0%": 0.02813340875733855,
"calibration/coverage@1%": 0.02813340875733855,
"calibration/coverage@10%": 0.14844590875733857,
"calibration/coverage@15%": 0.23293939579256362,
"calibration/coverage@20%": 0.2794749877690802,
"calibration/coverage@25%": 0.3134937622309198,
"calibration/coverage@30%": 0.43190970523483363,
"calibration/coverage@5%": 0.12110215875733857,
"calibration/ece": 0.16516171110567518,
"calibration/mean_confidence": 0.48722826259784735,
"calibration/prompt_uniqueness": 0.7043301553557493,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1123.0,
"completions/max_terminated_length": 609.0,
"completions/mean_length": 223.16259765625,
"completions/mean_terminated_length": 222.7779998779297,
"completions/min_length": 105.4,
"completions/min_terminated_length": 105.4,
"epoch": 0.784,
"grad_norm": 0.0007308553322218359,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 835797086.0,
"reward": 0.8105384111404419,
"reward_std": 0.07616954743862152,
"rewards/accuracy_reward": 0.55625,
"rewards/brier_reward": 0.7731986522674561,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.00297453529201448,
"rewards/frontier_coverage_1": 0.08737537786364555,
"rewards/frontier_coverage_10": 0.08737537786364555,
"rewards/frontier_coverage_15": 0.08737537786364555,
"rewards/frontier_coverage_20": 0.06674087345600128,
"rewards/frontier_coverage_25": 0.04688483104109764,
"rewards/frontier_coverage_5": 0.08737537786364555,
"rewards/frontier_ece_reward": 0.003793169092386961,
"rewards/frontier_entropy_batch_reward": -0.04895992577075958,
"signal/accuracy_reward/centered_abs_mean": 0.1007080078125,
"signal/accuracy_reward/group_bin_occupancy": 0.175,
"signal/accuracy_reward/group_std_mean": 0.13696602880954742,
"signal/accuracy_reward/group_zero_std_frac": 0.6,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05035400390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05035400390625,
"signal/advantage_abs_mean": 0.057179969549179074,
"signal/advantage_pre_scale_abs_mean": 0.057179969549179074,
"signal/advantage_pre_scale_std": 0.09933086186647415,
"signal/advantage_std": 0.09933086186647415,
"signal/brier_reward/centered_abs_mean": 0.1394079804420471,
"signal/brier_reward/group_bin_occupancy": 0.819921875,
"signal/brier_reward/group_std_mean": 0.17940678894519807,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005664252396672964,
"signal/brier_reward/weight": 0.040633540600538254,
"signal/brier_reward/weighted_centered_abs_mean": 0.005664252396672964,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028895474504679443,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.673828125,
"signal/frontier_aurc_reward/group_std_mean": 0.004819400142878294,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.271691308938898e-05,
"signal/frontier_aurc_reward/weight": 0.021703314781188966,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.271691308938898e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18579940497875214,
"signal/frontier_coverage_1/group_bin_occupancy": 0.84453125,
"signal/frontier_coverage_1/group_std_mean": 0.24057165384292603,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035869122948497534,
"signal/frontier_coverage_1/weight": 0.019303881376981736,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035869122948497534,
"signal/frontier_coverage_10/centered_abs_mean": 0.18579940497875214,
"signal/frontier_coverage_10/group_bin_occupancy": 0.84453125,
"signal/frontier_coverage_10/group_std_mean": 0.24057165384292603,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035869122948497534,
"signal/frontier_coverage_10/weight": 0.019303881376981736,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035869122948497534,
"signal/frontier_coverage_15/centered_abs_mean": 0.18579940497875214,
"signal/frontier_coverage_15/group_bin_occupancy": 0.84453125,
"signal/frontier_coverage_15/group_std_mean": 0.24057165384292603,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035869122948497534,
"signal/frontier_coverage_15/weight": 0.019303881376981736,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035869122948497534,
"signal/frontier_coverage_20/centered_abs_mean": 0.12399939149618149,
"signal/frontier_coverage_20/group_bin_occupancy": 0.836328125,
"signal/frontier_coverage_20/group_std_mean": 0.16162908375263213,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002416027104482055,
"signal/frontier_coverage_20/weight": 0.019483447819948197,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002416027104482055,
"signal/frontier_coverage_25/centered_abs_mean": 0.07161483764648438,
"signal/frontier_coverage_25/group_bin_occupancy": 0.884765625,
"signal/frontier_coverage_25/group_std_mean": 0.09258138835430145,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001428340864367783,
"signal/frontier_coverage_25/weight": 0.019944119080901147,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001428340864367783,
"signal/frontier_coverage_5/centered_abs_mean": 0.18579940497875214,
"signal/frontier_coverage_5/group_bin_occupancy": 0.84453125,
"signal/frontier_coverage_5/group_std_mean": 0.24057165384292603,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035869122948497534,
"signal/frontier_coverage_5/weight": 0.019303881376981736,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035869122948497534,
"signal/frontier_ece_reward/centered_abs_mean": 0.006644812785089016,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7359375,
"signal/frontier_ece_reward/group_std_mean": 0.00850515179336071,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009973590495064855,
"signal/frontier_ece_reward/weight": 0.15009447634220124,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009973590495064855,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07034566476941109,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.541796875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.08720882162451744,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012043842859566212,
"signal/frontier_entropy_batch_reward/weight": 0.17122556865215302,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.012043842859566212,
"step": 245
},
{
"adaptive_ema/accuracy_reward": 0.49278542702458816,
"adaptive_ema/brier_reward": 0.7413502303549515,
"adaptive_ema/format_reward": 0.9690697337655709,
"adaptive_ema/frontier_aurc_reward": 0.02698703182868343,
"adaptive_ema/frontier_coverage_1": 0.13391419914220162,
"adaptive_ema/frontier_coverage_10": 0.13391419914220162,
"adaptive_ema/frontier_coverage_15": 0.1339120971969206,
"adaptive_ema/frontier_coverage_20": 0.1251032662017515,
"adaptive_ema/frontier_coverage_25": 0.10439676368024298,
"adaptive_ema/frontier_coverage_5": 0.13391419914220162,
"adaptive_ema/frontier_ece_reward": 0.0362690753632365,
"adaptive_ema/frontier_entropy_batch_reward": -0.09606711071792928,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.040344792604446414,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.021733880043029785,
"adaptive_weight/frontier_coverage_1": 0.01934548281133175,
"adaptive_weight/frontier_coverage_10": 0.01934548281133175,
"adaptive_weight/frontier_coverage_15": 0.019345529749989508,
"adaptive_weight/frontier_coverage_20": 0.019542290642857552,
"adaptive_weight/frontier_coverage_25": 0.02000480554997921,
"adaptive_weight/frontier_coverage_5": 0.01934548281133175,
"adaptive_weight/frontier_ece_reward": 0.1503250777721405,
"adaptive_weight/frontier_entropy_batch_reward": 0.1709671676158905,
"calibration/aurc": 0.23386092363351443,
"calibration/batch_distribution_entropy": 0.9667728581976904,
"calibration/batch_entropy_100bins": 0.7156706087414253,
"calibration/batch_entropy_10bins": 0.9667728581976904,
"calibration/batch_entropy_50bins": 0.8090857984717517,
"calibration/batch_uniqueness": 0.8946533203125,
"calibration/buffer_distribution_entropy": 0.9875890271396454,
"calibration/buffer_entropy_100bins": 0.7263351886603608,
"calibration/buffer_entropy_10bins": 0.9875890271396454,
"calibration/buffer_entropy_50bins": 0.8227308342238109,
"calibration/confidence_entropy": 0.45589743198968674,
"calibration/coverage@0%": 0.041796875,
"calibration/coverage@1%": 0.041796875,
"calibration/coverage@10%": 0.110546875,
"calibration/coverage@15%": 0.399609375,
"calibration/coverage@20%": 0.521484375,
"calibration/coverage@25%": 0.60390625,
"calibration/coverage@30%": 0.683984375,
"calibration/coverage@5%": 0.041796875,
"calibration/ece": 0.1378772265625,
"calibration/mean_confidence": 0.46403683593749995,
"calibration/prompt_uniqueness": 0.7259765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 498.6,
"completions/max_terminated_length": 498.6,
"completions/mean_length": 226.61142578125,
"completions/mean_terminated_length": 226.61142578125,
"completions/min_length": 109.4,
"completions/min_terminated_length": 109.4,
"epoch": 0.8,
"grad_norm": 0.001016717404127121,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 853128147.0,
"reward": 0.8214071750640869,
"reward_std": 0.07228949069976806,
"rewards/accuracy_reward": 0.58154296875,
"rewards/brier_reward": 0.7869839787483215,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0029516459442675115,
"rewards/frontier_coverage_1": 0.08761567920446396,
"rewards/frontier_coverage_10": 0.08761567920446396,
"rewards/frontier_coverage_15": 0.08751005381345749,
"rewards/frontier_coverage_20": 0.06634584963321685,
"rewards/frontier_coverage_25": 0.048831145837903024,
"rewards/frontier_coverage_5": 0.08761567920446396,
"rewards/frontier_ece_reward": 0.00398175586014986,
"rewards/frontier_entropy_batch_reward": -0.062575813382864,
"signal/accuracy_reward/centered_abs_mean": 0.096124267578125,
"signal/accuracy_reward/group_bin_occupancy": 0.17109375,
"signal/accuracy_reward/group_std_mean": 0.12790462523698806,
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0480621337890625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0480621337890625,
"signal/advantage_abs_mean": 0.05569566786289215,
"signal/advantage_pre_scale_abs_mean": 0.05569566786289215,
"signal/advantage_pre_scale_std": 0.0975670725107193,
"signal/advantage_std": 0.0975670725107193,
"signal/brier_reward/centered_abs_mean": 0.12941146790981292,
"signal/brier_reward/group_bin_occupancy": 0.819921875,
"signal/brier_reward/group_std_mean": 0.16520380973815918,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005221651773899794,
"signal/brier_reward/weight": 0.040344792604446414,
"signal/brier_reward/weighted_centered_abs_mean": 0.005221651773899794,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027469987981021404,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.68671875,
"signal/frontier_aurc_reward/group_std_mean": 0.004314049286767841,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.96989098994527e-05,
"signal/frontier_aurc_reward/weight": 0.021733880043029785,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.96989098994527e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.178289532661438,
"signal/frontier_coverage_1/group_bin_occupancy": 0.844140625,
"signal/frontier_coverage_1/group_std_mean": 0.22912515997886657,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034491011407226323,
"signal/frontier_coverage_1/weight": 0.01934548281133175,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034491011407226323,
"signal/frontier_coverage_10/centered_abs_mean": 0.178289532661438,
"signal/frontier_coverage_10/group_bin_occupancy": 0.844140625,
"signal/frontier_coverage_10/group_std_mean": 0.22912515997886657,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034491011407226323,
"signal/frontier_coverage_10/weight": 0.01934548281133175,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034491011407226323,
"signal/frontier_coverage_15/centered_abs_mean": 0.1779107928276062,
"signal/frontier_coverage_15/group_bin_occupancy": 0.84296875,
"signal/frontier_coverage_15/group_std_mean": 0.22861688137054442,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034417792223393915,
"signal/frontier_coverage_15/weight": 0.019345529749989508,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034417792223393915,
"signal/frontier_coverage_20/centered_abs_mean": 0.11839482039213181,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8390625,
"signal/frontier_coverage_20/group_std_mean": 0.15338994562625885,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002313756477087736,
"signal/frontier_coverage_20/weight": 0.019542290642857552,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002313756477087736,
"signal/frontier_coverage_25/centered_abs_mean": 0.0669538915157318,
"signal/frontier_coverage_25/group_bin_occupancy": 0.891015625,
"signal/frontier_coverage_25/group_std_mean": 0.08613462299108506,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013393915724009275,
"signal/frontier_coverage_25/weight": 0.02000480554997921,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013393915724009275,
"signal/frontier_coverage_5/centered_abs_mean": 0.178289532661438,
"signal/frontier_coverage_5/group_bin_occupancy": 0.844140625,
"signal/frontier_coverage_5/group_std_mean": 0.22912515997886657,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034491011407226323,
"signal/frontier_coverage_5/weight": 0.01934548281133175,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034491011407226323,
"signal/frontier_ece_reward/centered_abs_mean": 0.0066254565492272375,
"signal/frontier_ece_reward/group_bin_occupancy": 0.70234375,
"signal/frontier_ece_reward/group_std_mean": 0.008377740532159806,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009959676070138812,
"signal/frontier_ece_reward/weight": 0.1503250777721405,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009959676070138812,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08768114149570465,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.535546875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.10930583029985427,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014992043934762478,
"signal/frontier_entropy_batch_reward/weight": 0.1709671676158905,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014992043934762478,
"step": 250
},
{
"epoch": 0.8,
"eval_calibration/aurc": 0.4982370503843496,
"eval_calibration/batch_distribution_entropy": 0.9075570584484239,
"eval_calibration/batch_entropy_100bins": 0.5721649164690725,
"eval_calibration/batch_entropy_10bins": 0.9075570584484239,
"eval_calibration/batch_entropy_50bins": 0.6600130307272073,
"eval_calibration/batch_uniqueness": 0.8154296875,
"eval_calibration/buffer_distribution_entropy": 0.9877852855039699,
"eval_calibration/buffer_entropy_100bins": 0.727217222230328,
"eval_calibration/buffer_entropy_10bins": 0.9877852855039699,
"eval_calibration/buffer_entropy_50bins": 0.8232864763440138,
"eval_calibration/confidence_entropy": 0.4677585964356593,
"eval_calibration/coverage@0%": 0.03125,
"eval_calibration/coverage@1%": 0.03125,
"eval_calibration/coverage@10%": 0.03125,
"eval_calibration/coverage@15%": 0.1328125,
"eval_calibration/coverage@20%": 0.1328125,
"eval_calibration/coverage@25%": 0.140625,
"eval_calibration/coverage@30%": 0.203125,
"eval_calibration/coverage@5%": 0.03125,
"eval_calibration/ece": 0.169453125,
"eval_calibration/mean_confidence": 0.41960937500000006,
"eval_calibration/prompt_uniqueness": 0.8154296875,
"eval_completions/clipped_ratio": 0.001953125,
"eval_completions/max_length": 683.25,
"eval_completions/max_terminated_length": 399.25,
"eval_completions/mean_length": 233.7618522644043,
"eval_completions/mean_terminated_length": 231.1970443725586,
"eval_completions/min_length": 132.0,
"eval_completions/min_terminated_length": 132.0,
"eval_loss": 0.0,
"eval_num_tokens": 853128147.0,
"eval_reward": 0.7381985783576965,
"eval_reward_std": 0.2331293225288391,
"eval_rewards/accuracy_reward": 0.443359375,
"eval_rewards/brier_reward": 0.792000949382782,
"eval_rewards/format_reward": 0.998046875,
"eval_rewards/frontier_aurc_reward": -0.0030543976463377476,
"eval_rewards/frontier_coverage_1": 0.18230951577425003,
"eval_rewards/frontier_coverage_10": 0.18230951577425003,
"eval_rewards/frontier_coverage_15": 0.18006108701229095,
"eval_rewards/frontier_coverage_20": 0.12108920887112617,
"eval_rewards/frontier_coverage_25": 0.06418344844132662,
"eval_rewards/frontier_coverage_5": 0.18230951577425003,
"eval_rewards/frontier_ece_reward": 0.005655413027852774,
"eval_rewards/frontier_entropy_batch_reward": -0.19229092076420784,
"eval_runtime": 30.1689,
"eval_samples_per_second": 16.573,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4793701171875,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.49709299951791763,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23968505859375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23968505859375,
"eval_signal/advantage_abs_mean": 0.21164816245436668,
"eval_signal/advantage_pre_scale_abs_mean": 0.21164816245436668,
"eval_signal/advantage_pre_scale_std": 0.23054595291614532,
"eval_signal/advantage_std": 0.23054595291614532,
"eval_signal/brier_reward/centered_abs_mean": 0.18942880630493164,
"eval_signal/brier_reward/group_bin_occupancy": 0.8125,
"eval_signal/brier_reward/group_std_mean": 0.24447643756866455,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.007612279732711613,
"eval_signal/brier_reward/weight": 0.040185440331697464,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.007612279732711613,
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/format_reward/group_bin_occupancy": 0.1328125,
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0036958245909772813,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6796875,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.006954669952392578,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.03900147730019e-05,
"eval_signal/frontier_aurc_reward/weight": 0.02175157703459263,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.03900147730019e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3917815089225769,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4760092422366142,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.007587432046420872,
"eval_signal/frontier_coverage_1/weight": 0.019366487860679626,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.007587432046420872,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3917815089225769,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4760092422366142,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.007587432046420872,
"eval_signal/frontier_coverage_10/weight": 0.019366487860679626,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.007587432046420872,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.38814665377140045,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_15/group_std_mean": 0.47172661870718,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007517083082348108,
"eval_signal/frontier_coverage_15/weight": 0.019366605207324028,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.007517083082348108,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.24452262371778488,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9453125,
"eval_signal/frontier_coverage_20/group_std_mean": 0.30539170652627945,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0047855316661298275,
"eval_signal/frontier_coverage_20/weight": 0.019570916891098022,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0047855316661298275,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.10838207229971886,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.90625,
"eval_signal/frontier_coverage_25/group_std_mean": 0.14063885807991028,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021711059671361,
"eval_signal/frontier_coverage_25/weight": 0.020031966269016266,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021711059671361,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3917815089225769,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4760092422366142,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.007587432046420872,
"eval_signal/frontier_coverage_5/weight": 0.019366487860679626,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.007587432046420872,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.010417576879262924,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.953125,
"eval_signal/frontier_ece_reward/group_std_mean": 0.012703315122053027,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015673820744268596,
"eval_signal/frontier_ece_reward/weight": 0.1504555344581604,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015673820744268596,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28658779338002205,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5625,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.3573639839887619,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04896022565662861,
"eval_signal/frontier_entropy_batch_reward/weight": 0.17083849012851715,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04896022565662861,
"eval_steps_per_second": 0.133,
"step": 250
},
{
"adaptive_ema/accuracy_reward": 0.49717680829476807,
"adaptive_ema/brier_reward": 0.7433735015687879,
"adaptive_ema/format_reward": 0.9705789145165795,
"adaptive_ema/frontier_aurc_reward": 0.025524089397445687,
"adaptive_ema/frontier_coverage_1": 0.13136228238124845,
"adaptive_ema/frontier_coverage_10": 0.13136228238124845,
"adaptive_ema/frontier_coverage_15": 0.1313538576434497,
"adaptive_ema/frontier_coverage_20": 0.12215141927429277,
"adaptive_ema/frontier_coverage_25": 0.10166368874048068,
"adaptive_ema/frontier_coverage_5": 0.13136228238124845,
"adaptive_ema/frontier_ece_reward": 0.03467876901782855,
"adaptive_ema/frontier_entropy_batch_reward": -0.09387897375412108,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.04003090411424637,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.021767470613121988,
"adaptive_weight/frontier_coverage_1": 0.01940329633653164,
"adaptive_weight/frontier_coverage_10": 0.01940329633653164,
"adaptive_weight/frontier_coverage_15": 0.019403484463691712,
"adaptive_weight/frontier_coverage_20": 0.01960904449224472,
"adaptive_weight/frontier_coverage_25": 0.02006669230759144,
"adaptive_weight/frontier_coverage_5": 0.01940329633653164,
"adaptive_weight/frontier_ece_reward": 0.15057945251464844,
"adaptive_weight/frontier_entropy_batch_reward": 0.17063305974006654,
"calibration/aurc": 0.2760196631448222,
"calibration/batch_distribution_entropy": 0.9658467385414122,
"calibration/batch_entropy_100bins": 0.731850484679636,
"calibration/batch_entropy_10bins": 0.9658467385414122,
"calibration/batch_entropy_50bins": 0.8224371996463742,
"calibration/batch_uniqueness": 0.8936126708984375,
"calibration/buffer_distribution_entropy": 0.9880820657401277,
"calibration/buffer_entropy_100bins": 0.7282793861138731,
"calibration/buffer_entropy_10bins": 0.9880820657401277,
"calibration/buffer_entropy_50bins": 0.8240600011509466,
"calibration/confidence_entropy": 0.45068693737141163,
"calibration/coverage@0%": 0.00703125,
"calibration/coverage@1%": 0.00703125,
"calibration/coverage@10%": 0.070703125,
"calibration/coverage@15%": 0.14296875,
"calibration/coverage@20%": 0.261328125,
"calibration/coverage@25%": 0.344921875,
"calibration/coverage@30%": 0.680078125,
"calibration/coverage@5%": 0.059375,
"calibration/ece": 0.1727737759491081,
"calibration/mean_confidence": 0.48958559905089194,
"calibration/prompt_uniqueness": 0.7189453125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 708.8,
"completions/max_terminated_length": 708.8,
"completions/mean_length": 227.3673828125,
"completions/mean_terminated_length": 227.3673828125,
"completions/min_length": 104.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.816,
"grad_norm": 0.0008266636286862195,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 870555557.0,
"reward": 0.8199009537696839,
"reward_std": 0.07451344430446624,
"rewards/accuracy_reward": 0.57900390625,
"rewards/brier_reward": 0.7676302909851074,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0031252800021320582,
"rewards/frontier_coverage_1": 0.06562656462192536,
"rewards/frontier_coverage_10": 0.06562656462192536,
"rewards/frontier_coverage_15": 0.06530472338199615,
"rewards/frontier_coverage_20": 0.05655328780412674,
"rewards/frontier_coverage_25": 0.04255493320524693,
"rewards/frontier_coverage_5": 0.06562656462192536,
"rewards/frontier_ece_reward": 0.0030927245039492845,
"rewards/frontier_entropy_batch_reward": -0.04541266113519669,
"signal/accuracy_reward/centered_abs_mean": 0.103240966796875,
"signal/accuracy_reward/group_bin_occupancy": 0.171484375,
"signal/accuracy_reward/group_std_mean": 0.1336445689201355,
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0516204833984375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0516204833984375,
"signal/advantage_abs_mean": 0.0583218663930893,
"signal/advantage_pre_scale_abs_mean": 0.0583218663930893,
"signal/advantage_pre_scale_std": 0.10045389086008072,
"signal/advantage_std": 0.10045389086008072,
"signal/brier_reward/centered_abs_mean": 0.14593898355960847,
"signal/brier_reward/group_bin_occupancy": 0.830859375,
"signal/brier_reward/group_std_mean": 0.18419291973114013,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0058418047614395615,
"signal/brier_reward/weight": 0.04003090411424637,
"signal/brier_reward/weighted_centered_abs_mean": 0.0058418047614395615,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031489692628383636,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.668359375,
"signal/frontier_aurc_reward/group_std_mean": 0.00541405794210732,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.85463601257652e-05,
"signal/frontier_aurc_reward/weight": 0.021767470613121988,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.85463601257652e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1957257032394409,
"signal/frontier_coverage_1/group_bin_occupancy": 0.851953125,
"signal/frontier_coverage_1/group_std_mean": 0.24688530266284942,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037978495936840774,
"signal/frontier_coverage_1/weight": 0.01940329633653164,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037978495936840774,
"signal/frontier_coverage_10/centered_abs_mean": 0.1957257032394409,
"signal/frontier_coverage_10/group_bin_occupancy": 0.851953125,
"signal/frontier_coverage_10/group_std_mean": 0.24688530266284942,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037978495936840774,
"signal/frontier_coverage_10/weight": 0.01940329633653164,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037978495936840774,
"signal/frontier_coverage_15/centered_abs_mean": 0.19446902275085448,
"signal/frontier_coverage_15/group_bin_occupancy": 0.85078125,
"signal/frontier_coverage_15/group_std_mean": 0.245290607213974,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003773502167314291,
"signal/frontier_coverage_15/weight": 0.019403484463691712,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003773502167314291,
"signal/frontier_coverage_20/centered_abs_mean": 0.1260778859257698,
"signal/frontier_coverage_20/group_bin_occupancy": 0.848046875,
"signal/frontier_coverage_20/group_std_mean": 0.1604294866323471,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024724855553358792,
"signal/frontier_coverage_20/weight": 0.01960904449224472,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024724855553358792,
"signal/frontier_coverage_25/centered_abs_mean": 0.07231247574090957,
"signal/frontier_coverage_25/group_bin_occupancy": 0.896484375,
"signal/frontier_coverage_25/group_std_mean": 0.0922024741768837,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014510968467220664,
"signal/frontier_coverage_25/weight": 0.02006669230759144,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014510968467220664,
"signal/frontier_coverage_5/centered_abs_mean": 0.1957257032394409,
"signal/frontier_coverage_5/group_bin_occupancy": 0.851953125,
"signal/frontier_coverage_5/group_std_mean": 0.24688530266284942,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037978495936840774,
"signal/frontier_coverage_5/weight": 0.01940329633653164,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037978495936840774,
"signal/frontier_ece_reward/centered_abs_mean": 0.007009109575301408,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7265625,
"signal/frontier_ece_reward/group_std_mean": 0.008824359998106957,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010554298525676132,
"signal/frontier_ece_reward/weight": 0.15057945251464844,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010554298525676132,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06629724502563476,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.538671875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.08260493278503418,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011315522249788046,
"signal/frontier_entropy_batch_reward/weight": 0.17063305974006654,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011315522249788046,
"step": 255
},
{
"adaptive_ema/accuracy_reward": 0.5002342215898916,
"adaptive_ema/brier_reward": 0.7453260418626751,
"adaptive_ema/format_reward": 0.9720150777575703,
"adaptive_ema/frontier_aurc_reward": 0.0241355303840874,
"adaptive_ema/frontier_coverage_1": 0.12974147653299598,
"adaptive_ema/frontier_coverage_10": 0.12974147653299598,
"adaptive_ema/frontier_coverage_15": 0.12965426872532376,
"adaptive_ema/frontier_coverage_20": 0.119753790365455,
"adaptive_ema/frontier_coverage_25": 0.09915882452118516,
"adaptive_ema/frontier_coverage_5": 0.12974147653299598,
"adaptive_ema/frontier_ece_reward": 0.03317143645873037,
"adaptive_ema/frontier_entropy_batch_reward": -0.09209263060264991,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.03973116055130958,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.021801147237420083,
"adaptive_weight/frontier_coverage_1": 0.01944187395274639,
"adaptive_weight/frontier_coverage_10": 0.01944187395274639,
"adaptive_weight/frontier_coverage_15": 0.019443822279572486,
"adaptive_weight/frontier_coverage_20": 0.019665002822875977,
"adaptive_weight/frontier_coverage_25": 0.020125101879239084,
"adaptive_weight/frontier_coverage_5": 0.01944187395274639,
"adaptive_weight/frontier_ece_reward": 0.15083298087120056,
"adaptive_weight/frontier_entropy_batch_reward": 0.17037515938282013,
"calibration/aurc": 0.31624097243406496,
"calibration/batch_distribution_entropy": 0.9582681490821494,
"calibration/batch_entropy_100bins": 0.7523830817628614,
"calibration/batch_entropy_10bins": 0.9582681490821494,
"calibration/batch_entropy_50bins": 0.8359059233335117,
"calibration/batch_uniqueness": 0.8990996762118865,
"calibration/buffer_distribution_entropy": 0.988342808364413,
"calibration/buffer_entropy_100bins": 0.7313311553469075,
"calibration/buffer_entropy_10bins": 0.988342808364413,
"calibration/buffer_entropy_50bins": 0.8260630727799582,
"calibration/confidence_entropy": 0.4638325415171778,
"calibration/coverage@0%": 0.0203125,
"calibration/coverage@1%": 0.0203125,
"calibration/coverage@10%": 0.169140625,
"calibration/coverage@15%": 0.19375,
"calibration/coverage@20%": 0.25940251956947163,
"calibration/coverage@25%": 0.3587940313111546,
"calibration/coverage@30%": 0.49090784001956944,
"calibration/coverage@5%": 0.132421875,
"calibration/ece": 0.12444036356409001,
"calibration/mean_confidence": 0.432767708842955,
"calibration/prompt_uniqueness": 0.7441850834905047,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 912.6,
"completions/max_terminated_length": 533.2,
"completions/mean_length": 233.1248046875,
"completions/mean_terminated_length": 232.87039489746093,
"completions/min_length": 105.8,
"completions/min_terminated_length": 105.8,
"epoch": 0.832,
"grad_norm": 0.0009671748848631978,
"learning_rate": 1e-06,
"loss": 0.0009,
"num_tokens": 887951107.0,
"reward": 0.8058806896209717,
"reward_std": 0.07332316190004348,
"rewards/accuracy_reward": 0.54541015625,
"rewards/brier_reward": 0.7977130174636841,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0025748027022928,
"rewards/frontier_coverage_1": 0.12267700582742691,
"rewards/frontier_coverage_10": 0.12267700582742691,
"rewards/frontier_coverage_15": 0.12098241597414017,
"rewards/frontier_coverage_20": 0.08454428240656853,
"rewards/frontier_coverage_25": 0.05629109740257263,
"rewards/frontier_coverage_5": 0.12267700582742691,
"rewards/frontier_ece_reward": 0.004667013976722955,
"rewards/frontier_entropy_batch_reward": -0.06709275171160697,
"signal/accuracy_reward/centered_abs_mean": 0.100091552734375,
"signal/accuracy_reward/group_bin_occupancy": 0.16875,
"signal/accuracy_reward/group_std_mean": 0.1287603422999382,
"signal/accuracy_reward/group_zero_std_frac": 0.65,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0500457763671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0500457763671875,
"signal/advantage_abs_mean": 0.05775661841034889,
"signal/advantage_pre_scale_abs_mean": 0.05775661841034889,
"signal/advantage_pre_scale_std": 0.10012289136648178,
"signal/advantage_std": 0.10012289136648178,
"signal/brier_reward/centered_abs_mean": 0.12413015365600585,
"signal/brier_reward/group_bin_occupancy": 0.821875,
"signal/brier_reward/group_std_mean": 0.1587320536375046,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.004931798111647368,
"signal/brier_reward/weight": 0.03973116055130958,
"signal/brier_reward/weighted_centered_abs_mean": 0.004931798111647368,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024345603305846453,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.678125,
"signal/frontier_aurc_reward/group_std_mean": 0.004217228572815657,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3077179472893476e-05,
"signal/frontier_aurc_reward/weight": 0.021801147237420083,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3077179472893476e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17841730415821075,
"signal/frontier_coverage_1/group_bin_occupancy": 0.85546875,
"signal/frontier_coverage_1/group_std_mean": 0.22835949063301086,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034687367733567953,
"signal/frontier_coverage_1/weight": 0.01944187395274639,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034687367733567953,
"signal/frontier_coverage_10/centered_abs_mean": 0.17841730415821075,
"signal/frontier_coverage_10/group_bin_occupancy": 0.85546875,
"signal/frontier_coverage_10/group_std_mean": 0.22835949063301086,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034687367733567953,
"signal/frontier_coverage_10/weight": 0.01944187395274639,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034687367733567953,
"signal/frontier_coverage_15/centered_abs_mean": 0.17739293575286866,
"signal/frontier_coverage_15/group_bin_occupancy": 0.855078125,
"signal/frontier_coverage_15/group_std_mean": 0.22711012065410613,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00344918011687696,
"signal/frontier_coverage_15/weight": 0.019443822279572486,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00344918011687696,
"signal/frontier_coverage_20/centered_abs_mean": 0.11429814547300339,
"signal/frontier_coverage_20/group_bin_occupancy": 0.842578125,
"signal/frontier_coverage_20/group_std_mean": 0.14738841652870177,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00224763173609972,
"signal/frontier_coverage_20/weight": 0.019665002822875977,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00224763173609972,
"signal/frontier_coverage_25/centered_abs_mean": 0.06452373266220093,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_25/group_std_mean": 0.08209397196769715,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012985036475583911,
"signal/frontier_coverage_25/weight": 0.020125101879239084,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012985036475583911,
"signal/frontier_coverage_5/centered_abs_mean": 0.17841730415821075,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85546875,
"signal/frontier_coverage_5/group_std_mean": 0.22835949063301086,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034687367733567953,
"signal/frontier_coverage_5/weight": 0.01944187395274639,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034687367733567953,
"signal/frontier_ece_reward/centered_abs_mean": 0.006456979457288981,
"signal/frontier_ece_reward/group_bin_occupancy": 0.683984375,
"signal/frontier_ece_reward/group_std_mean": 0.008200454525649548,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009739116998389363,
"signal/frontier_ece_reward/weight": 0.15083298087120056,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009739116998389363,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09052571952342987,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.556640625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.11314128339290619,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015424954518675805,
"signal/frontier_entropy_batch_reward/weight": 0.17037515938282013,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015424954518675805,
"step": 260
},
{
"adaptive_ema/accuracy_reward": 0.5019849432815855,
"adaptive_ema/brier_reward": 0.7473832881442929,
"adaptive_ema/format_reward": 0.9733741927078121,
"adaptive_ema/frontier_aurc_reward": 0.022813292688545976,
"adaptive_ema/frontier_coverage_1": 0.12871053705245333,
"adaptive_ema/frontier_coverage_10": 0.12871053705245333,
"adaptive_ema/frontier_coverage_15": 0.12861132513256696,
"adaptive_ema/frontier_coverage_20": 0.11723889345214158,
"adaptive_ema/frontier_coverage_25": 0.09663794196021616,
"adaptive_ema/frontier_coverage_5": 0.12871053705245333,
"adaptive_ema/frontier_ece_reward": 0.03175659418592736,
"adaptive_ema/frontier_entropy_batch_reward": -0.08984249954012516,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.039427295327186584,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.02184015288949013,
"adaptive_weight/frontier_coverage_1": 0.019473345205187797,
"adaptive_weight/frontier_coverage_10": 0.019473345205187797,
"adaptive_weight/frontier_coverage_15": 0.019475562125444414,
"adaptive_weight/frontier_coverage_20": 0.01972973793745041,
"adaptive_weight/frontier_coverage_25": 0.020190170407295226,
"adaptive_weight/frontier_coverage_5": 0.019473345205187797,
"adaptive_weight/frontier_ece_reward": 0.15111920833587647,
"adaptive_weight/frontier_entropy_batch_reward": 0.17009783387184144,
"calibration/aurc": 0.3186903471152664,
"calibration/batch_distribution_entropy": 0.9701558712644506,
"calibration/batch_entropy_100bins": 0.7717989175152062,
"calibration/batch_entropy_10bins": 0.9701558712644506,
"calibration/batch_entropy_50bins": 0.8565927875400245,
"calibration/batch_uniqueness": 0.9124298095703125,
"calibration/buffer_distribution_entropy": 0.9888908768893276,
"calibration/buffer_entropy_100bins": 0.7359311187257248,
"calibration/buffer_entropy_10bins": 0.9888908768893276,
"calibration/buffer_entropy_50bins": 0.8294183692710424,
"calibration/confidence_entropy": 0.49147967100944756,
"calibration/coverage@0%": 0.0203125,
"calibration/coverage@1%": 0.0203125,
"calibration/coverage@10%": 0.161328125,
"calibration/coverage@15%": 0.262109375,
"calibration/coverage@20%": 0.382421875,
"calibration/coverage@25%": 0.489453125,
"calibration/coverage@30%": 0.544921875,
"calibration/coverage@5%": 0.10078125,
"calibration/ece": 0.1589255205922068,
"calibration/mean_confidence": 0.49733127628279317,
"calibration/prompt_uniqueness": 0.769921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 784.2,
"completions/max_terminated_length": 601.8,
"completions/mean_length": 232.1328125,
"completions/mean_terminated_length": 232.00542602539062,
"completions/min_length": 111.8,
"completions/min_terminated_length": 111.8,
"epoch": 0.848,
"grad_norm": 0.0008154821116477251,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 905342515.0,
"reward": 0.8021765947341919,
"reward_std": 0.07231762930750847,
"rewards/accuracy_reward": 0.532421875,
"rewards/brier_reward": 0.7882626295089722,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002877800026908517,
"rewards/frontier_coverage_1": 0.11268590837717056,
"rewards/frontier_coverage_10": 0.11268590837717056,
"rewards/frontier_coverage_15": 0.11268590837717056,
"rewards/frontier_coverage_20": 0.0692408874630928,
"rewards/frontier_coverage_25": 0.04865131340920925,
"rewards/frontier_coverage_5": 0.11268590837717056,
"rewards/frontier_ece_reward": 0.004459475306794048,
"rewards/frontier_entropy_batch_reward": -0.039942212775349616,
"signal/accuracy_reward/centered_abs_mean": 0.09129638671875,
"signal/accuracy_reward/group_bin_occupancy": 0.172265625,
"signal/accuracy_reward/group_std_mean": 0.12554115206003189,
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045648193359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045648193359375,
"signal/advantage_abs_mean": 0.053890705853700635,
"signal/advantage_pre_scale_abs_mean": 0.053890705853700635,
"signal/advantage_pre_scale_std": 0.09712902307510377,
"signal/advantage_std": 0.09712902307510377,
"signal/brier_reward/centered_abs_mean": 0.12816624343395233,
"signal/brier_reward/group_bin_occupancy": 0.824609375,
"signal/brier_reward/group_std_mean": 0.1650087833404541,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005053586699068547,
"signal/brier_reward/weight": 0.039427295327186584,
"signal/brier_reward/weighted_centered_abs_mean": 0.005053586699068547,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027573672123253345,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.667578125,
"signal/frontier_aurc_reward/group_std_mean": 0.004844694584608078,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.021907247486524e-05,
"signal/frontier_aurc_reward/weight": 0.02184015288949013,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.021907247486524e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17698953449726104,
"signal/frontier_coverage_1/group_bin_occupancy": 0.85390625,
"signal/frontier_coverage_1/group_std_mean": 0.2280345469713211,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034466007724404334,
"signal/frontier_coverage_1/weight": 0.019473345205187797,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034466007724404334,
"signal/frontier_coverage_10/centered_abs_mean": 0.17698953449726104,
"signal/frontier_coverage_10/group_bin_occupancy": 0.85390625,
"signal/frontier_coverage_10/group_std_mean": 0.2280345469713211,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034466007724404334,
"signal/frontier_coverage_10/weight": 0.019473345205187797,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034466007724404334,
"signal/frontier_coverage_15/centered_abs_mean": 0.17698953449726104,
"signal/frontier_coverage_15/group_bin_occupancy": 0.85390625,
"signal/frontier_coverage_15/group_std_mean": 0.2280345469713211,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003446992952376604,
"signal/frontier_coverage_15/weight": 0.019475562125444414,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003446992952376604,
"signal/frontier_coverage_20/centered_abs_mean": 0.11172761619091034,
"signal/frontier_coverage_20/group_bin_occupancy": 0.865234375,
"signal/frontier_coverage_20/group_std_mean": 0.14430533349514008,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022044140379875897,
"signal/frontier_coverage_20/weight": 0.01972973793745041,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022044140379875897,
"signal/frontier_coverage_25/centered_abs_mean": 0.06382529959082603,
"signal/frontier_coverage_25/group_bin_occupancy": 0.888671875,
"signal/frontier_coverage_25/group_std_mean": 0.08236846774816513,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001288694189861417,
"signal/frontier_coverage_25/weight": 0.020190170407295226,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001288694189861417,
"signal/frontier_coverage_5/centered_abs_mean": 0.17698953449726104,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85390625,
"signal/frontier_coverage_5/group_std_mean": 0.2280345469713211,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034466007724404334,
"signal/frontier_coverage_5/weight": 0.019473345205187797,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034466007724404334,
"signal/frontier_ece_reward/centered_abs_mean": 0.006545740459114313,
"signal/frontier_ece_reward/group_bin_occupancy": 0.701953125,
"signal/frontier_ece_reward/group_std_mean": 0.008315418288111687,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000989195634610951,
"signal/frontier_ece_reward/weight": 0.15111920833587647,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000989195634610951,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06969715096056461,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.51640625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.0891161508858204,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01185646653175354,
"signal/frontier_entropy_batch_reward/weight": 0.17009783387184144,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01185646653175354,
"step": 265
},
{
"adaptive_ema/accuracy_reward": 0.5044514748920346,
"adaptive_ema/brier_reward": 0.7493093621849761,
"adaptive_ema/format_reward": 0.9746733789377287,
"adaptive_ema/frontier_aurc_reward": 0.021553571186420806,
"adaptive_ema/frontier_coverage_1": 0.12737641092100635,
"adaptive_ema/frontier_coverage_10": 0.12737641092100635,
"adaptive_ema/frontier_coverage_15": 0.12728400434624482,
"adaptive_ema/frontier_coverage_20": 0.1148155370049325,
"adaptive_ema/frontier_coverage_25": 0.09428026537524757,
"adaptive_ema/frontier_coverage_5": 0.12737641092100635,
"adaptive_ema/frontier_ece_reward": 0.03040152289721601,
"adaptive_ema/frontier_entropy_batch_reward": -0.08760520956054822,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.039141100645065305,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.02187635935842991,
"adaptive_weight/frontier_coverage_1": 0.019510345160961153,
"adaptive_weight/frontier_coverage_10": 0.019510345160961153,
"adaptive_weight/frontier_coverage_15": 0.01951241083443165,
"adaptive_weight/frontier_coverage_20": 0.019791184365749358,
"adaptive_weight/frontier_coverage_25": 0.020250317454338074,
"adaptive_weight/frontier_coverage_5": 0.019510345160961153,
"adaptive_weight/frontier_ece_reward": 0.15138642489910126,
"adaptive_weight/frontier_entropy_batch_reward": 0.16981116831302642,
"calibration/aurc": 0.31419108840785065,
"calibration/batch_distribution_entropy": 0.9680592532098515,
"calibration/batch_entropy_100bins": 0.776597155140324,
"calibration/batch_entropy_10bins": 0.9680592532098515,
"calibration/batch_entropy_50bins": 0.8547908572094611,
"calibration/batch_uniqueness": 0.91705322265625,
"calibration/buffer_distribution_entropy": 0.9894312794661053,
"calibration/buffer_entropy_100bins": 0.7407619660570571,
"calibration/buffer_entropy_10bins": 0.9894312794661053,
"calibration/buffer_entropy_50bins": 0.833047557256501,
"calibration/confidence_entropy": 0.4909109986565158,
"calibration/coverage@0%": 0.002734375,
"calibration/coverage@1%": 0.002734375,
"calibration/coverage@10%": 0.00625,
"calibration/coverage@15%": 0.070703125,
"calibration/coverage@20%": 0.18828125,
"calibration/coverage@25%": 0.34140625,
"calibration/coverage@30%": 0.471484375,
"calibration/coverage@5%": 0.002734375,
"calibration/ece": 0.14773077485521877,
"calibration/mean_confidence": 0.5650817251447812,
"calibration/prompt_uniqueness": 0.786669921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 756.6,
"completions/max_terminated_length": 570.8,
"completions/mean_length": 235.30107421875,
"completions/mean_terminated_length": 235.17388916015625,
"completions/min_length": 114.6,
"completions/min_terminated_length": 114.6,
"epoch": 0.864,
"grad_norm": 0.001250621397048235,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 922738814.0,
"reward": 0.8196427822113037,
"reward_std": 0.07560298591852188,
"rewards/accuracy_reward": 0.577734375,
"rewards/brier_reward": 0.782896625995636,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002946721948683262,
"rewards/frontier_coverage_1": 0.07985682934522628,
"rewards/frontier_coverage_10": 0.07985682934522628,
"rewards/frontier_coverage_15": 0.07995446622371674,
"rewards/frontier_coverage_20": 0.05727446302771568,
"rewards/frontier_coverage_25": 0.04549731090664864,
"rewards/frontier_coverage_5": 0.07985682934522628,
"rewards/frontier_ece_reward": 0.003623440582305193,
"rewards/frontier_entropy_batch_reward": -0.05079686343669891,
"signal/accuracy_reward/centered_abs_mean": 0.10323486328125,
"signal/accuracy_reward/group_bin_occupancy": 0.175390625,
"signal/accuracy_reward/group_std_mean": 0.13858965933322906,
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051617431640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051617431640625,
"signal/advantage_abs_mean": 0.056996123492717744,
"signal/advantage_pre_scale_abs_mean": 0.056996123492717744,
"signal/advantage_pre_scale_std": 0.100760318338871,
"signal/advantage_std": 0.100760318338871,
"signal/brier_reward/centered_abs_mean": 0.13495307713747023,
"signal/brier_reward/group_bin_occupancy": 0.8296875,
"signal/brier_reward/group_std_mean": 0.17243683338165283,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005282375589013099,
"signal/brier_reward/weight": 0.039141100645065305,
"signal/brier_reward/weighted_centered_abs_mean": 0.005282375589013099,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002958740387111902,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.670703125,
"signal/frontier_aurc_reward/group_std_mean": 0.00494068767875433,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.472636378020979e-05,
"signal/frontier_aurc_reward/weight": 0.02187635935842991,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.472636378020979e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18261632919311524,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86171875,
"signal/frontier_coverage_1/group_std_mean": 0.2336251974105835,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035628742538392545,
"signal/frontier_coverage_1/weight": 0.019510345160961153,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035628742538392545,
"signal/frontier_coverage_10/centered_abs_mean": 0.18261632919311524,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86171875,
"signal/frontier_coverage_10/group_std_mean": 0.2336251974105835,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035628742538392545,
"signal/frontier_coverage_10/weight": 0.019510345160961153,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035628742538392545,
"signal/frontier_coverage_15/centered_abs_mean": 0.1817552149295807,
"signal/frontier_coverage_15/group_bin_occupancy": 0.861328125,
"signal/frontier_coverage_15/group_std_mean": 0.23247010409832,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003546440601348877,
"signal/frontier_coverage_15/weight": 0.01951241083443165,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003546440601348877,
"signal/frontier_coverage_20/centered_abs_mean": 0.11168777346611022,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8515625,
"signal/frontier_coverage_20/group_std_mean": 0.14396534562110902,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022104350849986077,
"signal/frontier_coverage_20/weight": 0.019791184365749358,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022104350849986077,
"signal/frontier_coverage_25/centered_abs_mean": 0.06536850556731225,
"signal/frontier_coverage_25/group_bin_occupancy": 0.90234375,
"signal/frontier_coverage_25/group_std_mean": 0.08358364701271057,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013237420003861188,
"signal/frontier_coverage_25/weight": 0.020250317454338074,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013237420003861188,
"signal/frontier_coverage_5/centered_abs_mean": 0.18261632919311524,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86171875,
"signal/frontier_coverage_5/group_std_mean": 0.2336251974105835,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035628742538392545,
"signal/frontier_coverage_5/weight": 0.019510345160961153,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035628742538392545,
"signal/frontier_ece_reward/centered_abs_mean": 0.0068363240920007225,
"signal/frontier_ece_reward/group_bin_occupancy": 0.705078125,
"signal/frontier_ece_reward/group_std_mean": 0.008612703718245029,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010349399410188199,
"signal/frontier_ece_reward/weight": 0.15138642489910126,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010349399410188199,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07362207397818565,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.571484375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.09382486641407013,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012501352466642857,
"signal/frontier_entropy_batch_reward/weight": 0.16981116831302642,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.012501352466642857,
"step": 270
},
{
"adaptive_ema/accuracy_reward": 0.505869124740359,
"adaptive_ema/brier_reward": 0.751255920640763,
"adaptive_ema/format_reward": 0.975910806373159,
"adaptive_ema/frontier_aurc_reward": 0.02033559825264712,
"adaptive_ema/frontier_coverage_1": 0.1269209463031713,
"adaptive_ema/frontier_coverage_10": 0.1269209463031713,
"adaptive_ema/frontier_coverage_15": 0.12677453549308487,
"adaptive_ema/frontier_coverage_20": 0.11302953540707772,
"adaptive_ema/frontier_coverage_25": 0.09220718762997454,
"adaptive_ema/frontier_coverage_5": 0.1269209463031713,
"adaptive_ema/frontier_ece_reward": 0.02913881739932035,
"adaptive_ema/frontier_entropy_batch_reward": -0.08627040592132958,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.03884957581758499,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.02191058695316315,
"adaptive_weight/frontier_coverage_1": 0.019526761770248414,
"adaptive_weight/frontier_coverage_10": 0.019526761770248414,
"adaptive_weight/frontier_coverage_15": 0.019530036300420762,
"adaptive_weight/frontier_coverage_20": 0.019837450236082077,
"adaptive_weight/frontier_coverage_25": 0.02030315063893795,
"adaptive_weight/frontier_coverage_5": 0.019526761770248414,
"adaptive_weight/frontier_ece_reward": 0.1516319841146469,
"adaptive_weight/frontier_entropy_batch_reward": 0.1696569263935089,
"calibration/aurc": 0.39134423975519905,
"calibration/batch_distribution_entropy": 0.9757211224535725,
"calibration/batch_entropy_100bins": 0.7925706641499016,
"calibration/batch_entropy_10bins": 0.9757211224535725,
"calibration/batch_entropy_50bins": 0.8665987159588019,
"calibration/batch_uniqueness": 0.9180042159134094,
"calibration/buffer_distribution_entropy": 0.9899841993361905,
"calibration/buffer_entropy_100bins": 0.7451823056316547,
"calibration/buffer_entropy_10bins": 0.9899841993361905,
"calibration/buffer_entropy_50bins": 0.8362615117432455,
"calibration/confidence_entropy": 0.4627085505313879,
"calibration/coverage@0%": 0.0007827788649706457,
"calibration/coverage@1%": 0.0007827788649706457,
"calibration/coverage@10%": 0.0007827788649706457,
"calibration/coverage@15%": 0.0469185726516634,
"calibration/coverage@20%": 0.11843046722113501,
"calibration/coverage@25%": 0.18492768468688844,
"calibration/coverage@30%": 0.2803074547455969,
"calibration/coverage@5%": 0.0007827788649706457,
"calibration/ece": 0.13390239180033145,
"calibration/mean_confidence": 0.4874716846651296,
"calibration/prompt_uniqueness": 0.7678843550825962,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 733.2,
"completions/max_terminated_length": 546.2,
"completions/mean_length": 233.93681640625,
"completions/mean_terminated_length": 233.81001892089844,
"completions/min_length": 109.6,
"completions/min_terminated_length": 109.6,
"epoch": 0.88,
"grad_norm": 0.0028608383145183325,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 940281399.0,
"reward": 0.7891320705413818,
"reward_std": 0.07504131197929383,
"rewards/accuracy_reward": 0.5111328125,
"rewards/brier_reward": 0.7911717891693115,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0033999960869550705,
"rewards/frontier_coverage_1": 0.1350691318511963,
"rewards/frontier_coverage_10": 0.1350691318511963,
"rewards/frontier_coverage_15": 0.13321488201618195,
"rewards/frontier_coverage_20": 0.08778582438826561,
"rewards/frontier_coverage_25": 0.05375445336103439,
"rewards/frontier_coverage_5": 0.1350691318511963,
"rewards/frontier_ece_reward": 0.005027260864153504,
"rewards/frontier_entropy_batch_reward": -0.06602781862020493,
"signal/accuracy_reward/centered_abs_mean": 0.0984375,
"signal/accuracy_reward/group_bin_occupancy": 0.17265625,
"signal/accuracy_reward/group_std_mean": 0.13037826269865035,
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04921875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04921875,
"signal/advantage_abs_mean": 0.058286719024181366,
"signal/advantage_pre_scale_abs_mean": 0.058286719024181366,
"signal/advantage_pre_scale_std": 0.09950221627950669,
"signal/advantage_std": 0.09950221627950669,
"signal/brier_reward/centered_abs_mean": 0.131209397315979,
"signal/brier_reward/group_bin_occupancy": 0.826171875,
"signal/brier_reward/group_std_mean": 0.16899926364421844,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005097524542361498,
"signal/brier_reward/weight": 0.03884957581758499,
"signal/brier_reward/weighted_centered_abs_mean": 0.005097524542361498,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0033862961456179617,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.669140625,
"signal/frontier_aurc_reward/group_std_mean": 0.005641693249344826,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.419235189445316e-05,
"signal/frontier_aurc_reward/weight": 0.02191058695316315,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.419235189445316e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18316951990127564,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86171875,
"signal/frontier_coverage_1/group_std_mean": 0.23260467648506164,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003576703788712621,
"signal/frontier_coverage_1/weight": 0.019526761770248414,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003576703788712621,
"signal/frontier_coverage_10/centered_abs_mean": 0.18316951990127564,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86171875,
"signal/frontier_coverage_10/group_std_mean": 0.23260467648506164,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003576703788712621,
"signal/frontier_coverage_10/weight": 0.019526761770248414,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003576703788712621,
"signal/frontier_coverage_15/centered_abs_mean": 0.18001371920108794,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86171875,
"signal/frontier_coverage_15/group_std_mean": 0.22868903875350952,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035156650468707086,
"signal/frontier_coverage_15/weight": 0.019530036300420762,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035156650468707086,
"signal/frontier_coverage_20/centered_abs_mean": 0.11850059181451797,
"signal/frontier_coverage_20/group_bin_occupancy": 0.861328125,
"signal/frontier_coverage_20/group_std_mean": 0.15187331438064575,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00235068048350513,
"signal/frontier_coverage_20/weight": 0.019837450236082077,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00235068048350513,
"signal/frontier_coverage_25/centered_abs_mean": 0.06647183299064637,
"signal/frontier_coverage_25/group_bin_occupancy": 0.89921875,
"signal/frontier_coverage_25/group_std_mean": 0.08510075211524963,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013495617080479861,
"signal/frontier_coverage_25/weight": 0.02030315063893795,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013495617080479861,
"signal/frontier_coverage_5/centered_abs_mean": 0.18316951990127564,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86171875,
"signal/frontier_coverage_5/group_std_mean": 0.23260467648506164,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003576703788712621,
"signal/frontier_coverage_5/weight": 0.019526761770248414,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003576703788712621,
"signal/frontier_ece_reward/centered_abs_mean": 0.006699068006128073,
"signal/frontier_ece_reward/group_bin_occupancy": 0.692578125,
"signal/frontier_ece_reward/group_std_mean": 0.008414249680936337,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010157921817153693,
"signal/frontier_ece_reward/weight": 0.1516319841146469,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010157921817153693,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08991494029760361,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.547265625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.11144240945577621,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015255115181207656,
"signal/frontier_entropy_batch_reward/weight": 0.1696569263935089,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015255115181207656,
"step": 275
},
{
"adaptive_ema/accuracy_reward": 0.5073937566060608,
"adaptive_ema/brier_reward": 0.7527572799637534,
"adaptive_ema/format_reward": 0.9770818442948807,
"adaptive_ema/frontier_aurc_reward": 0.019175632617939066,
"adaptive_ema/frontier_coverage_1": 0.12594929285212791,
"adaptive_ema/frontier_coverage_10": 0.12594929285212791,
"adaptive_ema/frontier_coverage_15": 0.1257502319704197,
"adaptive_ema/frontier_coverage_20": 0.11108417689443409,
"adaptive_ema/frontier_coverage_25": 0.09012292847501352,
"adaptive_ema/frontier_coverage_5": 0.12594929285212791,
"adaptive_ema/frontier_ece_reward": 0.027905660536216792,
"adaptive_ema/frontier_entropy_batch_reward": -0.08442867341245203,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.038624754548072814,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.021942029893398284,
"adaptive_weight/frontier_coverage_1": 0.019553394988179207,
"adaptive_weight/frontier_coverage_10": 0.019553394988179207,
"adaptive_weight/frontier_coverage_15": 0.019557848572731018,
"adaptive_weight/frontier_coverage_20": 0.019885944575071333,
"adaptive_weight/frontier_coverage_25": 0.020354868844151497,
"adaptive_weight/frontier_coverage_5": 0.019553394988179207,
"adaptive_weight/frontier_ece_reward": 0.15186265110969543,
"adaptive_weight/frontier_entropy_batch_reward": 0.16941171288490295,
"calibration/aurc": 0.3689805672848886,
"calibration/batch_distribution_entropy": 0.9837880140435489,
"calibration/batch_entropy_100bins": 0.8313778848884892,
"calibration/batch_entropy_10bins": 0.9837880140435489,
"calibration/batch_entropy_50bins": 0.8950411901827275,
"calibration/batch_uniqueness": 0.9330657958984375,
"calibration/buffer_distribution_entropy": 0.9904783408780954,
"calibration/buffer_entropy_100bins": 0.7508931921521784,
"calibration/buffer_entropy_10bins": 0.9904783408780954,
"calibration/buffer_entropy_50bins": 0.840149593740642,
"calibration/confidence_entropy": 0.4785073333743538,
"calibration/coverage@0%": 0.011328125,
"calibration/coverage@1%": 0.011328125,
"calibration/coverage@10%": 0.047265625,
"calibration/coverage@15%": 0.0625,
"calibration/coverage@20%": 0.193359375,
"calibration/coverage@25%": 0.3390625,
"calibration/coverage@30%": 0.425390625,
"calibration/coverage@5%": 0.029296875,
"calibration/ece": 0.13935147315085533,
"calibration/mean_confidence": 0.509082942248854,
"calibration/prompt_uniqueness": 0.789599609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 955.0,
"completions/max_terminated_length": 543.6,
"completions/mean_length": 236.7720703125,
"completions/mean_terminated_length": 236.51817321777344,
"completions/min_length": 106.2,
"completions/min_terminated_length": 106.2,
"epoch": 0.896,
"grad_norm": 0.0009539374732412398,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 957816793.0,
"reward": 0.8073800921440124,
"reward_std": 0.06512940153479577,
"rewards/accuracy_reward": 0.54296875,
"rewards/brier_reward": 0.7862501502037048,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002993757161311805,
"rewards/frontier_coverage_1": 0.10751031935214997,
"rewards/frontier_coverage_10": 0.10751031935214997,
"rewards/frontier_coverage_15": 0.1055484026670456,
"rewards/frontier_coverage_20": 0.07425087094306945,
"rewards/frontier_coverage_25": 0.05060453489422798,
"rewards/frontier_coverage_5": 0.10751031935214997,
"rewards/frontier_ece_reward": 0.003828176483511925,
"rewards/frontier_entropy_batch_reward": -0.03423706814646721,
"signal/accuracy_reward/centered_abs_mean": 0.07969970703125,
"signal/accuracy_reward/group_bin_occupancy": 0.16953125,
"signal/accuracy_reward/group_std_mean": 0.11474124789237976,
"signal/accuracy_reward/group_zero_std_frac": 0.64375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039849853515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.039849853515625,
"signal/advantage_abs_mean": 0.04677758142352104,
"signal/advantage_pre_scale_abs_mean": 0.04677758142352104,
"signal/advantage_pre_scale_std": 0.08716107457876206,
"signal/advantage_std": 0.08716107457876206,
"signal/brier_reward/centered_abs_mean": 0.12659992277622223,
"signal/brier_reward/group_bin_occupancy": 0.834765625,
"signal/brier_reward/group_std_mean": 0.16378540694713592,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0048905015923082825,
"signal/brier_reward/weight": 0.038624754548072814,
"signal/brier_reward/weighted_centered_abs_mean": 0.0048905015923082825,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025218484457582234,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.669921875,
"signal/frontier_aurc_reward/group_std_mean": 0.003992916271090508,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.532704090001061e-05,
"signal/frontier_aurc_reward/weight": 0.021942029893398284,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.532704090001061e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17465949654579163,
"signal/frontier_coverage_1/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_1/group_std_mean": 0.2252320319414139,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034151420928537845,
"signal/frontier_coverage_1/weight": 0.019553394988179207,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034151420928537845,
"signal/frontier_coverage_10/centered_abs_mean": 0.17465949654579163,
"signal/frontier_coverage_10/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_10/group_std_mean": 0.2252320319414139,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034151420928537845,
"signal/frontier_coverage_10/weight": 0.019553394988179207,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034151420928537845,
"signal/frontier_coverage_15/centered_abs_mean": 0.1714522898197174,
"signal/frontier_coverage_15/group_bin_occupancy": 0.859765625,
"signal/frontier_coverage_15/group_std_mean": 0.22118231952190398,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033531927037984133,
"signal/frontier_coverage_15/weight": 0.019557848572731018,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033531927037984133,
"signal/frontier_coverage_20/centered_abs_mean": 0.11263496875762939,
"signal/frontier_coverage_20/group_bin_occupancy": 0.860546875,
"signal/frontier_coverage_20/group_std_mean": 0.1457061290740967,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00223980862647295,
"signal/frontier_coverage_20/weight": 0.019885944575071333,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00223980862647295,
"signal/frontier_coverage_25/centered_abs_mean": 0.06343068853020668,
"signal/frontier_coverage_25/group_bin_occupancy": 0.894140625,
"signal/frontier_coverage_25/group_std_mean": 0.08181680142879486,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012910918798297645,
"signal/frontier_coverage_25/weight": 0.020354868844151497,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012910918798297645,
"signal/frontier_coverage_5/centered_abs_mean": 0.17465949654579163,
"signal/frontier_coverage_5/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_5/group_std_mean": 0.2252320319414139,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034151420928537845,
"signal/frontier_coverage_5/weight": 0.019553394988179207,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034151420928537845,
"signal/frontier_ece_reward/centered_abs_mean": 0.006351580470800399,
"signal/frontier_ece_reward/group_bin_occupancy": 0.691796875,
"signal/frontier_ece_reward/group_std_mean": 0.008096476551145315,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009645341080613435,
"signal/frontier_ece_reward/weight": 0.15186265110969543,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009645341080613435,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.062499994039535524,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.584375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.07872299402952194,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01058815661817789,
"signal/frontier_entropy_batch_reward/weight": 0.16941171288490295,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01058815661817789,
"step": 280
},
{
"adaptive_ema/accuracy_reward": 0.5092284077458263,
"adaptive_ema/brier_reward": 0.7546247262433801,
"adaptive_ema/format_reward": 0.978191525834433,
"adaptive_ema/frontier_aurc_reward": 0.018114457775894748,
"adaptive_ema/frontier_coverage_1": 0.12504303037808023,
"adaptive_ema/frontier_coverage_10": 0.12504303037808023,
"adaptive_ema/frontier_coverage_15": 0.12474519740835857,
"adaptive_ema/frontier_coverage_20": 0.10942419283222868,
"adaptive_ema/frontier_coverage_25": 0.08823268569686613,
"adaptive_ema/frontier_coverage_5": 0.12504303037808023,
"adaptive_ema/frontier_ece_reward": 0.02673011646802669,
"adaptive_ema/frontier_entropy_batch_reward": -0.08223391461640496,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.038353316485881805,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.021977385878562926,
"adaptive_weight/frontier_coverage_1": 0.019584020227193834,
"adaptive_weight/frontier_coverage_10": 0.019584020227193834,
"adaptive_weight/frontier_coverage_15": 0.01959068663418293,
"adaptive_weight/frontier_coverage_20": 0.019933614134788512,
"adaptive_weight/frontier_coverage_25": 0.020407940819859506,
"adaptive_weight/frontier_coverage_5": 0.019584020227193834,
"adaptive_weight/frontier_ece_reward": 0.15212669968605042,
"adaptive_weight/frontier_entropy_batch_reward": 0.16915828883647918,
"calibration/aurc": 0.3819044789842734,
"calibration/batch_distribution_entropy": 0.9745500386866468,
"calibration/batch_entropy_100bins": 0.8460082275430061,
"calibration/batch_entropy_10bins": 0.9745500386866468,
"calibration/batch_entropy_50bins": 0.9023396987442025,
"calibration/batch_uniqueness": 0.9325820998525798,
"calibration/buffer_distribution_entropy": 0.9908606191108204,
"calibration/buffer_entropy_100bins": 0.7591955423673246,
"calibration/buffer_entropy_10bins": 0.9908606191108204,
"calibration/buffer_entropy_50bins": 0.8460835465188536,
"calibration/confidence_entropy": 0.4778993098135073,
"calibration/coverage@0%": 0.016408543297455967,
"calibration/coverage@1%": 0.016408543297455967,
"calibration/coverage@10%": 0.057814793297455966,
"calibration/coverage@15%": 0.09690710616438356,
"calibration/coverage@20%": 0.14847572162426614,
"calibration/coverage@25%": 0.27474162181996087,
"calibration/coverage@30%": 0.3970485261741683,
"calibration/coverage@5%": 0.05117416829745597,
"calibration/ece": 0.12728103744166014,
"calibration/mean_confidence": 0.4577738717529273,
"calibration/prompt_uniqueness": 0.79909076035705,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 796.8,
"completions/max_terminated_length": 651.6,
"completions/mean_length": 241.9130859375,
"completions/mean_terminated_length": 241.28506469726562,
"completions/min_length": 109.6,
"completions/min_terminated_length": 109.6,
"epoch": 0.912,
"grad_norm": 0.000874386983923614,
"learning_rate": 1e-06,
"loss": 0.0012,
"num_tokens": 975345279.0,
"reward": 0.8047022581100464,
"reward_std": 0.06991915851831436,
"rewards/accuracy_reward": 0.54228515625,
"rewards/brier_reward": 0.786166763305664,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.002516270847991109,
"rewards/frontier_coverage_1": 0.10352101437747478,
"rewards/frontier_coverage_10": 0.10352101437747478,
"rewards/frontier_coverage_15": 0.10200221072882414,
"rewards/frontier_coverage_20": 0.07823080904781818,
"rewards/frontier_coverage_25": 0.051585903763771056,
"rewards/frontier_coverage_5": 0.10352101437747478,
"rewards/frontier_ece_reward": 0.0038088133092969655,
"rewards/frontier_entropy_batch_reward": -0.04462176710367203,
"signal/accuracy_reward/centered_abs_mean": 0.087933349609375,
"signal/accuracy_reward/group_bin_occupancy": 0.169921875,
"signal/accuracy_reward/group_std_mean": 0.1205834612250328,
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0439666748046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0439666748046875,
"signal/advantage_abs_mean": 0.052356043457984926,
"signal/advantage_pre_scale_abs_mean": 0.052356043457984926,
"signal/advantage_pre_scale_std": 0.09320106953382493,
"signal/advantage_std": 0.09320106953382493,
"signal/brier_reward/centered_abs_mean": 0.1347974494099617,
"signal/brier_reward/group_bin_occupancy": 0.841015625,
"signal/brier_reward/group_std_mean": 0.17360511124134065,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005169952008873224,
"signal/brier_reward/weight": 0.038353316485881805,
"signal/brier_reward/weighted_centered_abs_mean": 0.005169952008873224,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_bin_occupancy": 0.12734375,
"signal/format_reward/group_std_mean": 0.0033145630266517402,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023487197468057275,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.68046875,
"signal/frontier_aurc_reward/group_std_mean": 0.004058520402759313,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1618300494737925e-05,
"signal/frontier_aurc_reward/weight": 0.021977385878562926,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1618300494737925e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18932445645332335,
"signal/frontier_coverage_1/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_1/group_std_mean": 0.24096913039684295,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037079003173857926,
"signal/frontier_coverage_1/weight": 0.019584020227193834,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037079003173857926,
"signal/frontier_coverage_10/centered_abs_mean": 0.18932445645332335,
"signal/frontier_coverage_10/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_10/group_std_mean": 0.24096913039684295,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037079003173857926,
"signal/frontier_coverage_10/weight": 0.019584020227193834,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037079003173857926,
"signal/frontier_coverage_15/centered_abs_mean": 0.18330602943897248,
"signal/frontier_coverage_15/group_bin_occupancy": 0.863671875,
"signal/frontier_coverage_15/group_std_mean": 0.2332424372434616,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003591257706284523,
"signal/frontier_coverage_15/weight": 0.01959068663418293,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003591257706284523,
"signal/frontier_coverage_20/centered_abs_mean": 0.12120090126991272,
"signal/frontier_coverage_20/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_20/group_std_mean": 0.15430061519145966,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002416037442162633,
"signal/frontier_coverage_20/weight": 0.019933614134788512,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002416037442162633,
"signal/frontier_coverage_25/centered_abs_mean": 0.07087931782007217,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9,
"signal/frontier_coverage_25/group_std_mean": 0.09026498645544052,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014465219341218472,
"signal/frontier_coverage_25/weight": 0.020407940819859506,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014465219341218472,
"signal/frontier_coverage_5/centered_abs_mean": 0.18932445645332335,
"signal/frontier_coverage_5/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_5/group_std_mean": 0.24096913039684295,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037079003173857926,
"signal/frontier_coverage_5/weight": 0.019584020227193834,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037079003173857926,
"signal/frontier_ece_reward/centered_abs_mean": 0.006613946333527565,
"signal/frontier_ece_reward/group_bin_occupancy": 0.712109375,
"signal/frontier_ece_reward/group_std_mean": 0.008411933667957783,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010061656357720494,
"signal/frontier_ece_reward/weight": 0.15212669968605042,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010061656357720494,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07715009674429893,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.570703125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.09899689108133317,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013051050342619419,
"signal/frontier_entropy_batch_reward/weight": 0.16915828883647918,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013051050342619419,
"step": 285
},
{
"adaptive_ema/accuracy_reward": 0.5106151035471637,
"adaptive_ema/brier_reward": 0.75579317795121,
"adaptive_ema/format_reward": 0.9792288181548298,
"adaptive_ema/frontier_aurc_reward": 0.0170897314251875,
"adaptive_ema/frontier_coverage_1": 0.12426640506099074,
"adaptive_ema/frontier_coverage_10": 0.12426640506099074,
"adaptive_ema/frontier_coverage_15": 0.12390274216129707,
"adaptive_ema/frontier_coverage_20": 0.1082497737327092,
"adaptive_ema/frontier_coverage_25": 0.0865964311924109,
"adaptive_ema/frontier_coverage_5": 0.12426640506099074,
"adaptive_ema/frontier_ece_reward": 0.025613024390907325,
"adaptive_ema/frontier_entropy_batch_reward": -0.08251692020997156,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.038155969232320786,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.021991834789514542,
"adaptive_weight/frontier_coverage_1": 0.019593842327594757,
"adaptive_weight/frontier_coverage_10": 0.019593842327594757,
"adaptive_weight/frontier_coverage_15": 0.01960197910666466,
"adaptive_weight/frontier_coverage_20": 0.01995220109820366,
"adaptive_weight/frontier_coverage_25": 0.02043667696416378,
"adaptive_weight/frontier_coverage_5": 0.019593842327594757,
"adaptive_weight/frontier_ece_reward": 0.1522425502538681,
"adaptive_weight/frontier_entropy_batch_reward": 0.16913725733757018,
"calibration/aurc": 0.4009781099250545,
"calibration/batch_distribution_entropy": 0.977108755039781,
"calibration/batch_entropy_100bins": 0.8685464926262375,
"calibration/batch_entropy_10bins": 0.977108755039781,
"calibration/batch_entropy_50bins": 0.9190047463555822,
"calibration/batch_uniqueness": 0.9368927001953125,
"calibration/buffer_distribution_entropy": 0.9910334895593215,
"calibration/buffer_entropy_100bins": 0.7698517366213622,
"calibration/buffer_entropy_10bins": 0.9910334895593215,
"calibration/buffer_entropy_50bins": 0.8536433225242437,
"calibration/confidence_entropy": 0.4729662905175904,
"calibration/coverage@0%": 0.003515625,
"calibration/coverage@1%": 0.003515625,
"calibration/coverage@10%": 0.003515625,
"calibration/coverage@15%": 0.003515625,
"calibration/coverage@20%": 0.0203125,
"calibration/coverage@25%": 0.168359375,
"calibration/coverage@30%": 0.30625,
"calibration/coverage@5%": 0.003515625,
"calibration/ece": 0.12290451159765922,
"calibration/mean_confidence": 0.46484572290423004,
"calibration/prompt_uniqueness": 0.7974609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1149.0,
"completions/max_terminated_length": 593.4,
"completions/mean_length": 237.97587890625,
"completions/mean_terminated_length": 237.46857604980468,
"completions/min_length": 112.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.928,
"grad_norm": 0.0007995866471901536,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 992808968.0,
"reward": 0.7858694791793823,
"reward_std": 0.0720029890537262,
"rewards/accuracy_reward": 0.5267578125,
"rewards/brier_reward": 0.7718318223953247,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0032001886516809463,
"rewards/frontier_coverage_1": 0.11377599537372589,
"rewards/frontier_coverage_10": 0.11377599537372589,
"rewards/frontier_coverage_15": 0.11207389831542969,
"rewards/frontier_coverage_20": 0.08739523887634278,
"rewards/frontier_coverage_25": 0.05544539391994476,
"rewards/frontier_coverage_5": 0.11377599537372589,
"rewards/frontier_ece_reward": 0.004172366205602884,
"rewards/frontier_entropy_batch_reward": -0.11274452954530716,
"signal/accuracy_reward/centered_abs_mean": 0.083251953125,
"signal/accuracy_reward/group_bin_occupancy": 0.16796875,
"signal/accuracy_reward/group_std_mean": 0.11550529301166534,
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0416259765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0416259765625,
"signal/advantage_abs_mean": 0.053884600102901456,
"signal/advantage_pre_scale_abs_mean": 0.053884600102901456,
"signal/advantage_pre_scale_std": 0.09473606795072556,
"signal/advantage_std": 0.09473606795072556,
"signal/brier_reward/centered_abs_mean": 0.1376921683549881,
"signal/brier_reward/group_bin_occupancy": 0.82109375,
"signal/brier_reward/group_std_mean": 0.17607857882976533,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005253981985151768,
"signal/brier_reward/weight": 0.038155969232320786,
"signal/brier_reward/weighted_centered_abs_mean": 0.005253981985151768,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003057646518573165,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.66875,
"signal/frontier_aurc_reward/group_std_mean": 0.005382006615400314,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.72438254696317e-05,
"signal/frontier_aurc_reward/weight": 0.021991834789514542,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.72438254696317e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18453809320926667,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8546875,
"signal/frontier_coverage_1/group_std_mean": 0.23699354827404023,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003615809418261051,
"signal/frontier_coverage_1/weight": 0.019593842327594757,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003615809418261051,
"signal/frontier_coverage_10/centered_abs_mean": 0.18453809320926667,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8546875,
"signal/frontier_coverage_10/group_std_mean": 0.23699354827404023,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003615809418261051,
"signal/frontier_coverage_10/weight": 0.019593842327594757,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003615809418261051,
"signal/frontier_coverage_15/centered_abs_mean": 0.18022237420082093,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8546875,
"signal/frontier_coverage_15/group_std_mean": 0.23157143592834473,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035327094607055186,
"signal/frontier_coverage_15/weight": 0.01960197910666466,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035327094607055186,
"signal/frontier_coverage_20/centered_abs_mean": 0.1192478209733963,
"signal/frontier_coverage_20/group_bin_occupancy": 0.860546875,
"signal/frontier_coverage_20/group_std_mean": 0.15341487228870393,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023792308289557695,
"signal/frontier_coverage_20/weight": 0.01995220109820366,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023792308289557695,
"signal/frontier_coverage_25/centered_abs_mean": 0.07185964584350586,
"signal/frontier_coverage_25/group_bin_occupancy": 0.89140625,
"signal/frontier_coverage_25/group_std_mean": 0.09106694906949997,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014685554197058082,
"signal/frontier_coverage_25/weight": 0.02043667696416378,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014685554197058082,
"signal/frontier_coverage_5/centered_abs_mean": 0.18453809320926667,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8546875,
"signal/frontier_coverage_5/group_std_mean": 0.23699354827404023,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003615809418261051,
"signal/frontier_coverage_5/weight": 0.019593842327594757,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003615809418261051,
"signal/frontier_ece_reward/centered_abs_mean": 0.00682686697691679,
"signal/frontier_ece_reward/group_bin_occupancy": 0.690625,
"signal/frontier_ece_reward/group_std_mean": 0.008775676786899566,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010393401607871056,
"signal/frontier_ece_reward/weight": 0.1522425502538681,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010393401607871056,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1246018260717392,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.496875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.15648339837789535,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02107503414154053,
"signal/frontier_entropy_batch_reward/weight": 0.16913725733757018,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02107503414154053,
"step": 290
},
{
"adaptive_ema/accuracy_reward": 0.5108080126735147,
"adaptive_ema/brier_reward": 0.7569582347148616,
"adaptive_ema/format_reward": 0.9802267878664974,
"adaptive_ema/frontier_aurc_reward": 0.016089259993850302,
"adaptive_ema/frontier_coverage_1": 0.12413362836872002,
"adaptive_ema/frontier_coverage_10": 0.12413362836872002,
"adaptive_ema/frontier_coverage_15": 0.12361727269615874,
"adaptive_ema/frontier_coverage_20": 0.10712179072242926,
"adaptive_ema/frontier_coverage_25": 0.08498310727964524,
"adaptive_ema/frontier_coverage_5": 0.12413362836872002,
"adaptive_ema/frontier_ece_reward": 0.024585941797864795,
"adaptive_ema/frontier_entropy_batch_reward": -0.08299495039063112,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.03796238005161286,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.022007527574896813,
"adaptive_weight/frontier_coverage_1": 0.01959085576236248,
"adaptive_weight/frontier_coverage_10": 0.01959085576236248,
"adaptive_weight/frontier_coverage_15": 0.0196024052798748,
"adaptive_weight/frontier_coverage_20": 0.019971366599202157,
"adaptive_weight/frontier_coverage_25": 0.020466551557183264,
"adaptive_weight/frontier_coverage_5": 0.01959085576236248,
"adaptive_weight/frontier_ece_reward": 0.15235669910907745,
"adaptive_weight/frontier_entropy_batch_reward": 0.16916049420833587,
"calibration/aurc": 0.27811625685180363,
"calibration/batch_distribution_entropy": 0.9834259313774243,
"calibration/batch_entropy_100bins": 0.8790850388790513,
"calibration/batch_entropy_10bins": 0.9834259313774243,
"calibration/batch_entropy_50bins": 0.9269705754561457,
"calibration/batch_uniqueness": 0.9436670570885285,
"calibration/buffer_distribution_entropy": 0.99125029700843,
"calibration/buffer_entropy_100bins": 0.7810425527134057,
"calibration/buffer_entropy_10bins": 0.99125029700843,
"calibration/buffer_entropy_50bins": 0.8617512833270945,
"calibration/confidence_entropy": 0.4925551201594316,
"calibration/coverage@0%": 0.017206610812133073,
"calibration/coverage@1%": 0.017206610812133073,
"calibration/coverage@10%": 0.09738716976516634,
"calibration/coverage@15%": 0.2462993823385519,
"calibration/coverage@20%": 0.3373822773972603,
"calibration/coverage@25%": 0.45305620107632094,
"calibration/coverage@30%": 0.5652030332681017,
"calibration/coverage@5%": 0.05552990459882583,
"calibration/ece": 0.10293797015478956,
"calibration/mean_confidence": 0.46638448443790226,
"calibration/prompt_uniqueness": 0.8167916924102497,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 926.4,
"completions/max_terminated_length": 518.8,
"completions/mean_length": 234.8603515625,
"completions/mean_terminated_length": 234.6066162109375,
"completions/min_length": 107.8,
"completions/min_terminated_length": 107.8,
"epoch": 0.944,
"grad_norm": 0.0025131264701485634,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 1010189362.0,
"reward": 0.7966355443000793,
"reward_std": 0.08069588989019394,
"rewards/accuracy_reward": 0.5318359375,
"rewards/brier_reward": 0.7839723229408264,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0030146833509206773,
"rewards/frontier_coverage_1": 0.11095014810562134,
"rewards/frontier_coverage_10": 0.11095014810562134,
"rewards/frontier_coverage_15": 0.10751423984766006,
"rewards/frontier_coverage_20": 0.0784274235367775,
"rewards/frontier_coverage_25": 0.05127616748213768,
"rewards/frontier_coverage_5": 0.11095014810562134,
"rewards/frontier_ece_reward": 0.004399744141846895,
"rewards/frontier_entropy_batch_reward": -0.06369005087763072,
"signal/accuracy_reward/centered_abs_mean": 0.113671875,
"signal/accuracy_reward/group_bin_occupancy": 0.175,
"signal/accuracy_reward/group_std_mean": 0.14452168345451355,
"signal/accuracy_reward/group_zero_std_frac": 0.6,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0568359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0568359375,
"signal/advantage_abs_mean": 0.06398204565048218,
"signal/advantage_pre_scale_abs_mean": 0.06398204565048218,
"signal/advantage_pre_scale_std": 0.10589756518602371,
"signal/advantage_std": 0.10589756518602371,
"signal/brier_reward/centered_abs_mean": 0.13136824518442153,
"signal/brier_reward/group_bin_occupancy": 0.848046875,
"signal/brier_reward/group_std_mean": 0.16815738677978515,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.004987320583313704,
"signal/brier_reward/weight": 0.03796238005161286,
"signal/brier_reward/weighted_centered_abs_mean": 0.004987320583313704,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027758491691201926,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.68828125,
"signal/frontier_aurc_reward/group_std_mean": 0.00486855860799551,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.108681991463527e-05,
"signal/frontier_aurc_reward/weight": 0.022007527574896813,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.108681991463527e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19229323863983155,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86796875,
"signal/frontier_coverage_1/group_std_mean": 0.2426188260316849,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037671592552214863,
"signal/frontier_coverage_1/weight": 0.01959085576236248,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037671592552214863,
"signal/frontier_coverage_10/centered_abs_mean": 0.19229323863983155,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86796875,
"signal/frontier_coverage_10/group_std_mean": 0.2426188260316849,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037671592552214863,
"signal/frontier_coverage_10/weight": 0.01959085576236248,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037671592552214863,
"signal/frontier_coverage_15/centered_abs_mean": 0.18586346805095671,
"signal/frontier_coverage_15/group_bin_occupancy": 0.865234375,
"signal/frontier_coverage_15/group_std_mean": 0.23480392396450042,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003643349697813392,
"signal/frontier_coverage_15/weight": 0.0196024052798748,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003643349697813392,
"signal/frontier_coverage_20/centered_abs_mean": 0.12204153388738632,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_20/group_std_mean": 0.15582017004489898,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024373249616473913,
"signal/frontier_coverage_20/weight": 0.019971366599202157,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024373249616473913,
"signal/frontier_coverage_25/centered_abs_mean": 0.06823899745941162,
"signal/frontier_coverage_25/group_bin_occupancy": 0.916796875,
"signal/frontier_coverage_25/group_std_mean": 0.08735780119895935,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013965919613838195,
"signal/frontier_coverage_25/weight": 0.020466551557183264,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013965919613838195,
"signal/frontier_coverage_5/centered_abs_mean": 0.19229323863983155,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86796875,
"signal/frontier_coverage_5/group_std_mean": 0.2426188260316849,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037671592552214863,
"signal/frontier_coverage_5/weight": 0.01959085576236248,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037671592552214863,
"signal/frontier_ece_reward/centered_abs_mean": 0.007136920373886824,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7046875,
"signal/frontier_ece_reward/group_std_mean": 0.009026623517274856,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010873484192416073,
"signal/frontier_ece_reward/weight": 0.15235669910907745,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010873484192416073,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09417234137654304,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.52890625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.12170673459768296,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015933521930128335,
"signal/frontier_entropy_batch_reward/weight": 0.16916049420833587,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015933521930128335,
"step": 295
},
{
"adaptive_ema/accuracy_reward": 0.5122639748209438,
"adaptive_ema/brier_reward": 0.7585752915825011,
"adaptive_ema/format_reward": 0.9811795896968777,
"adaptive_ema/frontier_aurc_reward": 0.015144469697940383,
"adaptive_ema/frontier_coverage_1": 0.12333728149075274,
"adaptive_ema/frontier_coverage_10": 0.12333728149075274,
"adaptive_ema/frontier_coverage_15": 0.12278979277989002,
"adaptive_ema/frontier_coverage_20": 0.10565099024087207,
"adaptive_ema/frontier_coverage_25": 0.08325280611908174,
"adaptive_ema/frontier_coverage_5": 0.12333728149075274,
"adaptive_ema/frontier_ece_reward": 0.023597073161841908,
"adaptive_ema/frontier_entropy_batch_reward": -0.08117773493254943,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.037726181000471114,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.022038231790065765,
"adaptive_weight/frontier_coverage_1": 0.019617187976837158,
"adaptive_weight/frontier_coverage_10": 0.019617187976837158,
"adaptive_weight/frontier_coverage_15": 0.01962943859398365,
"adaptive_weight/frontier_coverage_20": 0.020012956112623215,
"adaptive_weight/frontier_coverage_25": 0.02051416300237179,
"adaptive_weight/frontier_coverage_5": 0.019617187976837158,
"adaptive_weight/frontier_ece_reward": 0.1525774270296097,
"adaptive_weight/frontier_entropy_batch_reward": 0.16895003616809845,
"calibration/aurc": 0.3313838553836209,
"calibration/batch_distribution_entropy": 0.9878222102541848,
"calibration/batch_entropy_100bins": 0.8772238237138197,
"calibration/batch_entropy_10bins": 0.9878222102541848,
"calibration/batch_entropy_50bins": 0.9273256058028748,
"calibration/batch_uniqueness": 0.9473175048828125,
"calibration/buffer_distribution_entropy": 0.9916506011747319,
"calibration/buffer_entropy_100bins": 0.7920718405068625,
"calibration/buffer_entropy_10bins": 0.9916506011747319,
"calibration/buffer_entropy_50bins": 0.8694198189536589,
"calibration/confidence_entropy": 0.48771411926965297,
"calibration/coverage@0%": 0.004296875,
"calibration/coverage@1%": 0.004296875,
"calibration/coverage@10%": 0.141015625,
"calibration/coverage@15%": 0.2109375,
"calibration/coverage@20%": 0.27578125,
"calibration/coverage@25%": 0.31171875,
"calibration/coverage@30%": 0.366796875,
"calibration/coverage@5%": 0.0875,
"calibration/ece": 0.143712651508147,
"calibration/mean_confidence": 0.5249439933399102,
"calibration/prompt_uniqueness": 0.8244140625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 957.2,
"completions/max_terminated_length": 626.2,
"completions/mean_length": 231.46904296875,
"completions/mean_terminated_length": 231.2143524169922,
"completions/min_length": 107.4,
"completions/min_terminated_length": 107.4,
"epoch": 0.96,
"grad_norm": 0.0007890698034316301,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 1027499925.0,
"reward": 0.7937455177307129,
"reward_std": 0.06518236324191093,
"rewards/accuracy_reward": 0.523046875,
"rewards/brier_reward": 0.7962269902229309,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.003563016327098012,
"rewards/frontier_coverage_1": 0.12367903590202331,
"rewards/frontier_coverage_10": 0.12367903590202331,
"rewards/frontier_coverage_15": 0.12319278419017791,
"rewards/frontier_coverage_20": 0.08755376040935517,
"rewards/frontier_coverage_25": 0.05181429237127304,
"rewards/frontier_coverage_5": 0.12367903590202331,
"rewards/frontier_ece_reward": 0.004861411638557911,
"rewards/frontier_entropy_batch_reward": -0.06445286944508552,
"signal/accuracy_reward/centered_abs_mean": 0.07904052734375,
"signal/accuracy_reward/group_bin_occupancy": 0.164453125,
"signal/accuracy_reward/group_std_mean": 0.10704978257417679,
"signal/accuracy_reward/group_zero_std_frac": 0.684375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039520263671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.039520263671875,
"signal/advantage_abs_mean": 0.04916732534766197,
"signal/advantage_pre_scale_abs_mean": 0.04916732534766197,
"signal/advantage_pre_scale_std": 0.08988670557737351,
"signal/advantage_std": 0.08988670557737351,
"signal/brier_reward/centered_abs_mean": 0.11760072112083435,
"signal/brier_reward/group_bin_occupancy": 0.8328125,
"signal/brier_reward/group_std_mean": 0.15275218188762665,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00443740775808692,
"signal/brier_reward/weight": 0.037726181000471114,
"signal/brier_reward/weighted_centered_abs_mean": 0.00443740775808692,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032274942379444836,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7015625,
"signal/frontier_aurc_reward/group_std_mean": 0.005160880694165826,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.112704624887556e-05,
"signal/frontier_aurc_reward/weight": 0.022038231790065765,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.112704624887556e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15767935812473297,
"signal/frontier_coverage_1/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_1/group_std_mean": 0.20363860130310057,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030932387802749873,
"signal/frontier_coverage_1/weight": 0.019617187976837158,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030932387802749873,
"signal/frontier_coverage_10/centered_abs_mean": 0.15767935812473297,
"signal/frontier_coverage_10/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_10/group_std_mean": 0.20363860130310057,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030932387802749873,
"signal/frontier_coverage_10/weight": 0.019617187976837158,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030932387802749873,
"signal/frontier_coverage_15/centered_abs_mean": 0.1573574274778366,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8625,
"signal/frontier_coverage_15/group_std_mean": 0.20323737263679503,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030888537876307964,
"signal/frontier_coverage_15/weight": 0.01962943859398365,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030888537876307964,
"signal/frontier_coverage_20/centered_abs_mean": 0.10249871462583542,
"signal/frontier_coverage_20/group_bin_occupancy": 0.86328125,
"signal/frontier_coverage_20/group_std_mean": 0.13297524452209472,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002051279554143548,
"signal/frontier_coverage_20/weight": 0.020012956112623215,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002051279554143548,
"signal/frontier_coverage_25/centered_abs_mean": 0.061453332751989366,
"signal/frontier_coverage_25/group_bin_occupancy": 0.916015625,
"signal/frontier_coverage_25/group_std_mean": 0.07838500589132309,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012606421019881963,
"signal/frontier_coverage_25/weight": 0.02051416300237179,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012606421019881963,
"signal/frontier_coverage_5/centered_abs_mean": 0.15767935812473297,
"signal/frontier_coverage_5/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_5/group_std_mean": 0.20363860130310057,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030932387802749873,
"signal/frontier_coverage_5/weight": 0.019617187976837158,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030932387802749873,
"signal/frontier_ece_reward/centered_abs_mean": 0.006616297829896212,
"signal/frontier_ece_reward/group_bin_occupancy": 0.698046875,
"signal/frontier_ece_reward/group_std_mean": 0.008402452990412711,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010094881290569901,
"signal/frontier_ece_reward/weight": 0.1525774270296097,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010094881290569901,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08187509179115296,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.533984375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.10262143462896348,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013833227381110192,
"signal/frontier_entropy_batch_reward/weight": 0.16895003616809845,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013833227381110192,
"step": 300
},
{
"epoch": 0.96,
"eval_calibration/aurc": 0.428103398377754,
"eval_calibration/batch_distribution_entropy": 0.9376296667868993,
"eval_calibration/batch_entropy_100bins": 0.6705273382953512,
"eval_calibration/batch_entropy_10bins": 0.9376296667868993,
"eval_calibration/batch_entropy_50bins": 0.7588804645140499,
"eval_calibration/batch_uniqueness": 0.8896484375,
"eval_calibration/buffer_distribution_entropy": 0.9919862190492793,
"eval_calibration/buffer_entropy_100bins": 0.7985249951460675,
"eval_calibration/buffer_entropy_10bins": 0.9919862190492793,
"eval_calibration/buffer_entropy_50bins": 0.8737942300811827,
"eval_calibration/confidence_entropy": 0.4579045077229937,
"eval_calibration/coverage@0%": 0.078125,
"eval_calibration/coverage@1%": 0.078125,
"eval_calibration/coverage@10%": 0.078125,
"eval_calibration/coverage@15%": 0.078125,
"eval_calibration/coverage@20%": 0.1015625,
"eval_calibration/coverage@25%": 0.171875,
"eval_calibration/coverage@30%": 0.328125,
"eval_calibration/coverage@5%": 0.078125,
"eval_calibration/ece": 0.21414062500000003,
"eval_calibration/mean_confidence": 0.480234375,
"eval_calibration/prompt_uniqueness": 0.8896484375,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 430.5,
"eval_completions/max_terminated_length": 430.5,
"eval_completions/mean_length": 229.84199905395508,
"eval_completions/mean_terminated_length": 229.84199905395508,
"eval_completions/min_length": 131.75,
"eval_completions/min_terminated_length": 131.75,
"eval_loss": 0.0,
"eval_num_tokens": 1027499925.0,
"eval_reward": 0.7297923862934113,
"eval_reward_std": 0.2275286726653576,
"eval_rewards/accuracy_reward": 0.443359375,
"eval_rewards/brier_reward": 0.7976783066987991,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0036502022994682193,
"eval_rewards/frontier_coverage_1": 0.18035605922341347,
"eval_rewards/frontier_coverage_10": 0.18035605922341347,
"eval_rewards/frontier_coverage_15": 0.17741113528609276,
"eval_rewards/frontier_coverage_20": 0.12365293130278587,
"eval_rewards/frontier_coverage_25": 0.0637232419103384,
"eval_rewards/frontier_coverage_5": 0.18035605922341347,
"eval_rewards/frontier_ece_reward": 0.0058051192900165915,
"eval_rewards/frontier_entropy_batch_reward": -0.24005889892578125,
"eval_runtime": 22.5775,
"eval_samples_per_second": 22.146,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4752197265625,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.4949190020561218,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23760986328125,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23760986328125,
"eval_signal/advantage_abs_mean": 0.20552946254611015,
"eval_signal/advantage_pre_scale_abs_mean": 0.20552946254611015,
"eval_signal/advantage_pre_scale_std": 0.22503003850579262,
"eval_signal/advantage_std": 0.22503003850579262,
"eval_signal/brier_reward/centered_abs_mean": 0.1839733049273491,
"eval_signal/brier_reward/group_bin_occupancy": 0.84375,
"eval_signal/brier_reward/group_std_mean": 0.23671213164925575,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.006916819605976343,
"eval_signal/brier_reward/weight": 0.037596866488456726,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.006916819605976343,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004666732216719538,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6484375,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.009033310692757368,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010289873716828879,
"eval_signal/frontier_aurc_reward/weight": 0.022049419581890106,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010289873716828879,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3463926389813423,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_1/group_std_mean": 0.42951615899801254,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006794996908865869,
"eval_signal/frontier_coverage_1/weight": 0.01961645856499672,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006794996908865869,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3463926389813423,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_10/group_std_mean": 0.42951615899801254,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006794996908865869,
"eval_signal/frontier_coverage_10/weight": 0.01961645856499672,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006794996908865869,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.34066376090049744,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_15/group_std_mean": 0.42321472615003586,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006686707376502454,
"eval_signal/frontier_coverage_15/weight": 0.01962846703827381,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006686707376502454,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.21344707161188126,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_20/group_std_mean": 0.2740728035569191,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004273193306289613,
"eval_signal/frontier_coverage_20/weight": 0.02001992054283619,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004273193306289613,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.09510924108326435,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.12545426562428474,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019525631796568632,
"eval_signal/frontier_coverage_25/weight": 0.020529689267277718,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019525631796568632,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3463926389813423,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_5/group_std_mean": 0.42951615899801254,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006794996908865869,
"eval_signal/frontier_coverage_5/weight": 0.01961645856499672,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006794996908865869,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.01034590182825923,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8828125,
"eval_signal/frontier_ece_reward/group_std_mean": 0.01258331467397511,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001579352654516697,
"eval_signal/frontier_ece_reward/weight": 0.15265490114688873,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001579352654516697,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3177356719970703,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.578125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.37068361788988113,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.053688227199018,
"eval_signal/frontier_entropy_batch_reward/weight": 0.16897135972976685,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.053688227199018,
"eval_steps_per_second": 0.177,
"step": 300
},
{
"adaptive_ema/accuracy_reward": 0.5130771176454476,
"adaptive_ema/brier_reward": 0.7603371332806945,
"adaptive_ema/format_reward": 0.9820866134055427,
"adaptive_ema/frontier_aurc_reward": 0.014237143105888164,
"adaptive_ema/frontier_coverage_1": 0.1230539148089607,
"adaptive_ema/frontier_coverage_10": 0.1230539148089607,
"adaptive_ema/frontier_coverage_15": 0.12254646138111085,
"adaptive_ema/frontier_coverage_20": 0.1048816065193943,
"adaptive_ema/frontier_coverage_25": 0.08169918055750292,
"adaptive_ema/frontier_coverage_5": 0.1230539148089607,
"adaptive_ema/frontier_ece_reward": 0.02266221011791273,
"adaptive_ema/frontier_entropy_batch_reward": -0.0802780809085945,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.03746383413672447,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.022066167369484902,
"adaptive_weight/frontier_coverage_1": 0.019630317389965058,
"adaptive_weight/frontier_coverage_10": 0.019630317389965058,
"adaptive_weight/frontier_coverage_15": 0.019641677290201186,
"adaptive_weight/frontier_coverage_20": 0.020037102699279784,
"adaptive_weight/frontier_coverage_25": 0.02055603824555874,
"adaptive_weight/frontier_coverage_5": 0.019630317389965058,
"adaptive_weight/frontier_ece_reward": 0.15277635753154756,
"adaptive_weight/frontier_entropy_batch_reward": 0.16886787116527557,
"calibration/aurc": 0.2831852439692231,
"calibration/batch_distribution_entropy": 0.9723902640811122,
"calibration/batch_entropy_100bins": 0.8672982440029516,
"calibration/batch_entropy_10bins": 0.9723902640811122,
"calibration/batch_entropy_50bins": 0.9170440971790507,
"calibration/batch_uniqueness": 0.9440137046805166,
"calibration/buffer_distribution_entropy": 0.9921364559876962,
"calibration/buffer_entropy_100bins": 0.8027849052984001,
"calibration/buffer_entropy_10bins": 0.9921364559876962,
"calibration/buffer_entropy_50bins": 0.8766328649266398,
"calibration/confidence_entropy": 0.5113814259674367,
"calibration/coverage@0%": 0.01682751225490196,
"calibration/coverage@1%": 0.01682751225490196,
"calibration/coverage@10%": 0.0895435049019608,
"calibration/coverage@15%": 0.2998468137254902,
"calibration/coverage@20%": 0.45614276960784317,
"calibration/coverage@25%": 0.5272732843137254,
"calibration/coverage@30%": 0.5866973039215686,
"calibration/coverage@5%": 0.03871476715686274,
"calibration/ece": 0.15126782397040078,
"calibration/mean_confidence": 0.4896648492617291,
"calibration/prompt_uniqueness": 0.8269227916070498,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1167.0,
"completions/max_terminated_length": 742.2,
"completions/mean_length": 229.1451171875,
"completions/mean_terminated_length": 228.6332275390625,
"completions/min_length": 105.6,
"completions/min_terminated_length": 105.6,
"epoch": 0.976,
"grad_norm": 0.001429693540558219,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 1044707491.0,
"reward": 0.8066218614578247,
"reward_std": 0.07456310316920281,
"rewards/accuracy_reward": 0.55078125,
"rewards/brier_reward": 0.7886098504066468,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0032080563250929117,
"rewards/frontier_coverage_1": 0.09642277602106333,
"rewards/frontier_coverage_10": 0.09642277602106333,
"rewards/frontier_coverage_15": 0.0966072978451848,
"rewards/frontier_coverage_20": 0.07433595582842827,
"rewards/frontier_coverage_25": 0.04629525393247604,
"rewards/frontier_coverage_5": 0.09642277602106333,
"rewards/frontier_ece_reward": 0.003962028119713068,
"rewards/frontier_entropy_batch_reward": -0.051515225879848005,
"signal/accuracy_reward/centered_abs_mean": 0.0962158203125,
"signal/accuracy_reward/group_bin_occupancy": 0.171875,
"signal/accuracy_reward/group_std_mean": 0.12887984663248062,
"signal/accuracy_reward/group_zero_std_frac": 0.625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04810791015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04810791015625,
"signal/advantage_abs_mean": 0.05684085339307785,
"signal/advantage_pre_scale_abs_mean": 0.05684085339307785,
"signal/advantage_pre_scale_std": 0.09820054322481156,
"signal/advantage_std": 0.09820054322481156,
"signal/brier_reward/centered_abs_mean": 0.12333909422159195,
"signal/brier_reward/group_bin_occupancy": 0.84453125,
"signal/brier_reward/group_std_mean": 0.15947476029396057,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.004620636254549027,
"signal/brier_reward/weight": 0.03746383413672447,
"signal/brier_reward/weighted_centered_abs_mean": 0.004620636254549027,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029812861699610948,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.68359375,
"signal/frontier_aurc_reward/group_std_mean": 0.0049844134598970415,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.578704924322664e-05,
"signal/frontier_aurc_reward/weight": 0.022066167369484902,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.578704924322664e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1713780403137207,
"signal/frontier_coverage_1/group_bin_occupancy": 0.875390625,
"signal/frontier_coverage_1/group_std_mean": 0.2194644033908844,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003364281542599201,
"signal/frontier_coverage_1/weight": 0.019630317389965058,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003364281542599201,
"signal/frontier_coverage_10/centered_abs_mean": 0.1713780403137207,
"signal/frontier_coverage_10/group_bin_occupancy": 0.875390625,
"signal/frontier_coverage_10/group_std_mean": 0.2194644033908844,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003364281542599201,
"signal/frontier_coverage_10/weight": 0.019630317389965058,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003364281542599201,
"signal/frontier_coverage_15/centered_abs_mean": 0.16927050352096557,
"signal/frontier_coverage_15/group_bin_occupancy": 0.875,
"signal/frontier_coverage_15/group_std_mean": 0.21687354445457457,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033248161897063257,
"signal/frontier_coverage_15/weight": 0.019641677290201186,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033248161897063257,
"signal/frontier_coverage_20/centered_abs_mean": 0.11072558313608169,
"signal/frontier_coverage_20/group_bin_occupancy": 0.859765625,
"signal/frontier_coverage_20/group_std_mean": 0.14288919419050217,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022185867186635734,
"signal/frontier_coverage_20/weight": 0.020037102699279784,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022185867186635734,
"signal/frontier_coverage_25/centered_abs_mean": 0.061330854147672656,
"signal/frontier_coverage_25/group_bin_occupancy": 0.917578125,
"signal/frontier_coverage_25/group_std_mean": 0.0784957006573677,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001260715490207076,
"signal/frontier_coverage_25/weight": 0.02055603824555874,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001260715490207076,
"signal/frontier_coverage_5/centered_abs_mean": 0.1713780403137207,
"signal/frontier_coverage_5/group_bin_occupancy": 0.875390625,
"signal/frontier_coverage_5/group_std_mean": 0.2194644033908844,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003364281542599201,
"signal/frontier_coverage_5/weight": 0.019630317389965058,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003364281542599201,
"signal/frontier_ece_reward/centered_abs_mean": 0.006669245660305023,
"signal/frontier_ece_reward/group_bin_occupancy": 0.703125,
"signal/frontier_ece_reward/group_std_mean": 0.008504109550267458,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010189112043008208,
"signal/frontier_ece_reward/weight": 0.15277635753154756,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010189112043008208,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08715428188443183,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.51796875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.10973945707082748,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014718150720000267,
"signal/frontier_entropy_batch_reward/weight": 0.16886787116527557,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014718150720000267,
"step": 305
},
{
"adaptive_ema/accuracy_reward": 0.5144250010969874,
"adaptive_ema/brier_reward": 0.7617638121707504,
"adaptive_ema/format_reward": 0.9829559898913545,
"adaptive_ema/frontier_aurc_reward": 0.013385174144901188,
"adaptive_ema/frontier_coverage_1": 0.12253397598106337,
"adaptive_ema/frontier_coverage_10": 0.12253397598106337,
"adaptive_ema/frontier_coverage_15": 0.12198978612716108,
"adaptive_ema/frontier_coverage_20": 0.10383560650446637,
"adaptive_ema/frontier_coverage_25": 0.08025628350493652,
"adaptive_ema/frontier_coverage_5": 0.12253397598106337,
"adaptive_ema/frontier_ece_reward": 0.02175329515905596,
"adaptive_ema/frontier_entropy_batch_reward": -0.0801542770711169,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.037239187955856325,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.022084273025393487,
"adaptive_weight/frontier_coverage_1": 0.019641099125146867,
"adaptive_weight/frontier_coverage_10": 0.019641099125146867,
"adaptive_weight/frontier_coverage_15": 0.019653279706835745,
"adaptive_weight/frontier_coverage_20": 0.02005964070558548,
"adaptive_weight/frontier_coverage_25": 0.020587437599897385,
"adaptive_weight/frontier_coverage_5": 0.019641099125146867,
"adaptive_weight/frontier_ece_reward": 0.15291174948215486,
"adaptive_weight/frontier_entropy_batch_reward": 0.16884112656116484,
"calibration/aurc": 0.3806839677424766,
"calibration/batch_distribution_entropy": 0.9715458108502546,
"calibration/batch_entropy_100bins": 0.8537208499198261,
"calibration/batch_entropy_10bins": 0.9715458108502546,
"calibration/batch_entropy_50bins": 0.9048500226531984,
"calibration/batch_uniqueness": 0.9303741455078125,
"calibration/buffer_distribution_entropy": 0.9925188566651506,
"calibration/buffer_entropy_100bins": 0.8121605905038793,
"calibration/buffer_entropy_10bins": 0.9925188566651506,
"calibration/buffer_entropy_50bins": 0.883147629124353,
"calibration/confidence_entropy": 0.4781765688390339,
"calibration/coverage@0%": 0.022265625,
"calibration/coverage@1%": 0.022265625,
"calibration/coverage@10%": 0.07578125,
"calibration/coverage@15%": 0.104296875,
"calibration/coverage@20%": 0.116015625,
"calibration/coverage@25%": 0.15,
"calibration/coverage@30%": 0.348046875,
"calibration/coverage@5%": 0.04375,
"calibration/ece": 0.13601852483138854,
"calibration/mean_confidence": 0.4368255651329841,
"calibration/prompt_uniqueness": 0.783056640625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 709.0,
"completions/max_terminated_length": 491.6,
"completions/mean_length": 225.83916015625,
"completions/mean_terminated_length": 225.71083068847656,
"completions/min_length": 110.2,
"completions/min_terminated_length": 110.2,
"epoch": 0.992,
"grad_norm": 0.002593899378553033,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 1062148564.0,
"reward": 0.7919328927993774,
"reward_std": 0.06939845085144043,
"rewards/accuracy_reward": 0.5267578125,
"rewards/brier_reward": 0.7912951111793518,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0029651729855686426,
"rewards/frontier_coverage_1": 0.12737788558006286,
"rewards/frontier_coverage_10": 0.12737788558006286,
"rewards/frontier_coverage_15": 0.12562896311283112,
"rewards/frontier_coverage_20": 0.09472279995679855,
"rewards/frontier_coverage_25": 0.05767645165324211,
"rewards/frontier_coverage_5": 0.12737788558006286,
"rewards/frontier_ece_reward": 0.004302942892536521,
"rewards/frontier_entropy_batch_reward": -0.08593676090240479,
"signal/accuracy_reward/centered_abs_mean": 0.0891845703125,
"signal/accuracy_reward/group_bin_occupancy": 0.164453125,
"signal/accuracy_reward/group_std_mean": 0.11510567218065262,
"signal/accuracy_reward/group_zero_std_frac": 0.684375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04459228515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04459228515625,
"signal/advantage_abs_mean": 0.05407559126615524,
"signal/advantage_pre_scale_abs_mean": 0.05407559126615524,
"signal/advantage_pre_scale_std": 0.09398611634969711,
"signal/advantage_std": 0.09398611634969711,
"signal/brier_reward/centered_abs_mean": 0.12326372712850571,
"signal/brier_reward/group_bin_occupancy": 0.836328125,
"signal/brier_reward/group_std_mean": 0.1573864758014679,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.004590229969471693,
"signal/brier_reward/weight": 0.037239187955856325,
"signal/brier_reward/weighted_centered_abs_mean": 0.004590229969471693,
"signal/format_reward/centered_abs_mean": 0.0003662109375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.000768545875325799,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00018310546875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002825642959214747,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.68984375,
"signal/frontier_aurc_reward/group_std_mean": 0.00474727526307106,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.240200782485772e-05,
"signal/frontier_aurc_reward/weight": 0.022084273025393487,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.240200782485772e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17649457454681397,
"signal/frontier_coverage_1/group_bin_occupancy": 0.868359375,
"signal/frontier_coverage_1/group_std_mean": 0.22425515353679656,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034664351958781483,
"signal/frontier_coverage_1/weight": 0.019641099125146867,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034664351958781483,
"signal/frontier_coverage_10/centered_abs_mean": 0.17649457454681397,
"signal/frontier_coverage_10/group_bin_occupancy": 0.868359375,
"signal/frontier_coverage_10/group_std_mean": 0.22425515353679656,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034664351958781483,
"signal/frontier_coverage_10/weight": 0.019641099125146867,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034664351958781483,
"signal/frontier_coverage_15/centered_abs_mean": 0.17462271451950073,
"signal/frontier_coverage_15/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_15/group_std_mean": 0.22184259593486785,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034317933954298497,
"signal/frontier_coverage_15/weight": 0.019653279706835745,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034317933954298497,
"signal/frontier_coverage_20/centered_abs_mean": 0.11235518455505371,
"signal/frontier_coverage_20/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_20/group_std_mean": 0.14412140250205993,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022537729702889918,
"signal/frontier_coverage_20/weight": 0.02005964070558548,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022537729702889918,
"signal/frontier_coverage_25/centered_abs_mean": 0.06445452049374581,
"signal/frontier_coverage_25/group_bin_occupancy": 0.908203125,
"signal/frontier_coverage_25/group_std_mean": 0.08258575052022935,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013269589049741625,
"signal/frontier_coverage_25/weight": 0.020587437599897385,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013269589049741625,
"signal/frontier_coverage_5/centered_abs_mean": 0.17649457454681397,
"signal/frontier_coverage_5/group_bin_occupancy": 0.868359375,
"signal/frontier_coverage_5/group_std_mean": 0.22425515353679656,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034664351958781483,
"signal/frontier_coverage_5/weight": 0.019641099125146867,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034664351958781483,
"signal/frontier_ece_reward/centered_abs_mean": 0.006545371748507023,
"signal/frontier_ece_reward/group_bin_occupancy": 0.694921875,
"signal/frontier_ece_reward/group_std_mean": 0.008297445718199015,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010008613695390522,
"signal/frontier_ece_reward/weight": 0.15291174948215486,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010008613695390522,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10476705580949783,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.50859375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.1305326849222183,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017689590714871882,
"signal/frontier_entropy_batch_reward/weight": 0.16884112656116484,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017689590714871882,
"step": 310
},
{
"adaptive_ema/accuracy_reward": 0.5154819198209648,
"adaptive_ema/brier_reward": 0.7622238970963693,
"adaptive_ema/format_reward": 0.9835315309777346,
"adaptive_ema/frontier_aurc_reward": 0.012814884519278785,
"adaptive_ema/frontier_coverage_1": 0.12143998690674097,
"adaptive_ema/frontier_coverage_10": 0.12143998690674097,
"adaptive_ema/frontier_coverage_15": 0.12086318699406459,
"adaptive_ema/frontier_coverage_20": 0.10259387455745778,
"adaptive_ema/frontier_coverage_25": 0.07906396950440622,
"adaptive_ema/frontier_coverage_5": 0.12143998690674097,
"adaptive_ema/frontier_ece_reward": 0.021118504741223176,
"adaptive_ema/frontier_entropy_batch_reward": -0.07954886454452184,
"adaptive_weight/accuracy_reward": 0.5,
"adaptive_weight/brier_reward": 0.03715994581580162,
"adaptive_weight/format_reward": 0.5,
"adaptive_weight/frontier_aurc_reward": 0.022092683240771294,
"adaptive_weight/frontier_coverage_1": 0.01966171059757471,
"adaptive_weight/frontier_coverage_10": 0.01966171059757471,
"adaptive_weight/frontier_coverage_15": 0.019674619659781456,
"adaptive_weight/frontier_coverage_20": 0.020083477720618248,
"adaptive_weight/frontier_coverage_25": 0.020610064268112183,
"adaptive_weight/frontier_coverage_5": 0.01966171059757471,
"adaptive_weight/frontier_ece_reward": 0.15298082679510117,
"adaptive_weight/frontier_entropy_batch_reward": 0.1687132492661476,
"calibration/aurc": 0.3092712618845156,
"calibration/batch_distribution_entropy": 0.9647997895562282,
"calibration/batch_entropy_100bins": 0.842838417523208,
"calibration/batch_entropy_10bins": 0.9647997895562282,
"calibration/batch_entropy_50bins": 0.900232393020596,
"calibration/batch_uniqueness": 0.9379348754882812,
"calibration/buffer_distribution_entropy": 0.992895519242504,
"calibration/buffer_entropy_100bins": 0.8180349692082165,
"calibration/buffer_entropy_10bins": 0.992895519242504,
"calibration/buffer_entropy_50bins": 0.887059539018632,
"calibration/confidence_entropy": 0.48557998461300034,
"calibration/coverage@0%": 0.017578125,
"calibration/coverage@1%": 0.017578125,
"calibration/coverage@10%": 0.021484375,
"calibration/coverage@15%": 0.021484375,
"calibration/coverage@20%": 0.1484375,
"calibration/coverage@25%": 0.3115234375,
"calibration/coverage@30%": 0.591796875,
"calibration/coverage@5%": 0.021484375,
"calibration/ece": 0.17626870645936057,
"calibration/mean_confidence": 0.5701375435406395,
"calibration/prompt_uniqueness": 0.7572021484375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000244140625,
"completions/max_length": 984.0,
"completions/max_terminated_length": 444.5,
"completions/mean_length": 226.9221420288086,
"completions/mean_terminated_length": 226.60242462158203,
"completions/min_length": 111.5,
"completions/min_terminated_length": 111.5,
"epoch": 0.9984,
"num_tokens": 1069081719.0,
"reward": 0.7988417744636536,
"reward_std": 0.06897459551692009,
"rewards/accuracy_reward": 0.54345703125,
"rewards/brier_reward": 0.752604752779007,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0036660623736679554,
"rewards/frontier_coverage_1": 0.06480471789836884,
"rewards/frontier_coverage_10": 0.06480471789836884,
"rewards/frontier_coverage_15": 0.0636993870139122,
"rewards/frontier_coverage_20": 0.04550405964255333,
"rewards/frontier_coverage_25": 0.03341560810804367,
"rewards/frontier_coverage_5": 0.06480471789836884,
"rewards/frontier_ece_reward": 0.002690421766601503,
"rewards/frontier_entropy_batch_reward": -0.045069485902786255,
"signal/accuracy_reward/centered_abs_mean": 0.0859375,
"signal/accuracy_reward/group_bin_occupancy": 0.1669921875,
"signal/accuracy_reward/group_std_mean": 0.11501816660165787,
"signal/accuracy_reward/group_zero_std_frac": 0.6640625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04296875,
"signal/advantage_abs_mean": 0.05269451253116131,
"signal/advantage_pre_scale_abs_mean": 0.05269451253116131,
"signal/advantage_pre_scale_std": 0.09441451355814934,
"signal/advantage_std": 0.09441451355814934,
"signal/brier_reward/centered_abs_mean": 0.13213703781366348,
"signal/brier_reward/group_bin_occupancy": 0.83984375,
"signal/brier_reward/group_std_mean": 0.16977553814649582,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.004910205025225878,
"signal/brier_reward/weight": 0.03715994581580162,
"signal/brier_reward/weighted_centered_abs_mean": 0.004910205025225878,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036444071447476745,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6748046875,
"signal/frontier_aurc_reward/group_std_mean": 0.0065204238053411245,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.051491022342816e-05,
"signal/frontier_aurc_reward/weight": 0.022092683240771294,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.051491022342816e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.168254055082798,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8759765625,
"signal/frontier_coverage_1/group_std_mean": 0.21480195224285126,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033081687288358808,
"signal/frontier_coverage_1/weight": 0.01966171059757471,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033081687288358808,
"signal/frontier_coverage_10/centered_abs_mean": 0.168254055082798,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8759765625,
"signal/frontier_coverage_10/group_std_mean": 0.21480195224285126,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033081687288358808,
"signal/frontier_coverage_10/weight": 0.01966171059757471,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033081687288358808,
"signal/frontier_coverage_15/centered_abs_mean": 0.16302235424518585,
"signal/frontier_coverage_15/group_bin_occupancy": 0.875,
"signal/frontier_coverage_15/group_std_mean": 0.20827266573905945,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032074202317744493,
"signal/frontier_coverage_15/weight": 0.019674619659781456,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032074202317744493,
"signal/frontier_coverage_20/centered_abs_mean": 0.0979037694633007,
"signal/frontier_coverage_20/group_bin_occupancy": 0.875,
"signal/frontier_coverage_20/group_std_mean": 0.1266292929649353,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019662610720843077,
"signal/frontier_coverage_20/weight": 0.020083477720618248,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019662610720843077,
"signal/frontier_coverage_25/centered_abs_mean": 0.05765395425260067,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9013671875,
"signal/frontier_coverage_25/group_std_mean": 0.07497931271791458,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011882567778229713,
"signal/frontier_coverage_25/weight": 0.020610064268112183,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011882567778229713,
"signal/frontier_coverage_5/centered_abs_mean": 0.168254055082798,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8759765625,
"signal/frontier_coverage_5/group_std_mean": 0.21480195224285126,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033081687288358808,
"signal/frontier_coverage_5/weight": 0.01966171059757471,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033081687288358808,
"signal/frontier_ece_reward/centered_abs_mean": 0.006677088560536504,
"signal/frontier_ece_reward/group_bin_occupancy": 0.705078125,
"signal/frontier_ece_reward/group_std_mean": 0.008518182206898928,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010214661015197635,
"signal/frontier_ece_reward/weight": 0.15298082679510117,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010214661015197635,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07456976920366287,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.578125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.09598826617002487,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012581207789480686,
"signal/frontier_entropy_batch_reward/weight": 0.1687132492661476,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.012581207789480686,
"step": 312,
"total_flos": 0.0,
"train_loss": 0.0035973651800491214,
"train_runtime": 61836.3414,
"train_samples_per_second": 0.323,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 312,
"num_input_tokens_seen": 1069081719,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}