{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "adaptive_ema/accuracy_reward": 0.23182790830371097, "adaptive_ema/brier_reward": 0.38261595560645834, "adaptive_ema/format_reward": 0.6731302296015625, "adaptive_ema/frontier_aurc_reward": 0.3120027589018625, "adaptive_ema/frontier_coverage_1": 0.3120027589018625, "adaptive_ema/frontier_coverage_10": 0.3120027589018625, "adaptive_ema/frontier_coverage_15": 0.3120027589018625, "adaptive_ema/frontier_coverage_20": 0.3120027589018625, "adaptive_ema/frontier_coverage_25": 0.3120027589018625, "adaptive_ema/frontier_coverage_5": 0.3120027589018625, "adaptive_ema/frontier_ece_reward": 0.3120027589018625, "adaptive_ema/frontier_entropy_batch_reward": -0.44369452121519914, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.08982320576906204, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.014333849586546422, "adaptive_weight/frontier_coverage_1": 0.014333849586546422, "adaptive_weight/frontier_coverage_10": 0.014333849586546422, "adaptive_weight/frontier_coverage_15": 0.014333849586546422, "adaptive_weight/frontier_coverage_20": 0.014333849586546422, "adaptive_weight/frontier_coverage_25": 0.014333849586546422, "adaptive_weight/frontier_coverage_5": 0.014333849586546422, "adaptive_weight/frontier_ece_reward": 0.1000967189669609, "adaptive_weight/frontier_entropy_batch_reward": 0.21004312336444855, "calibration/aurc": 0.6448292157088171, "calibration/batch_distribution_entropy": 0.6427565667944656, "calibration/batch_entropy_100bins": 0.4781411640735785, "calibration/batch_entropy_10bins": 0.6427565667944656, "calibration/batch_entropy_50bins": 0.5589868489931903, "calibration/batch_uniqueness": 0.7176582774738784, "calibration/confidence_entropy": 0.34260182991373694, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5034219005334309, "calibration/mean_confidence": 0.7952695676480407, "calibration/prompt_uniqueness": 0.5897596315268132, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03369140625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1512.8, "completions/mean_length": 268.30361328125, "completions/mean_terminated_length": 224.09957275390624, "completions/min_length": 1.8, "completions/min_terminated_length": 1.8, "epoch": 0.016, "grad_norm": 0.079594187438488, "learning_rate": 3.1249999999999997e-07, "loss": 0.0614, "num_tokens": 17591461.0, "reward": 0.4612198233604431, "reward_std": 0.38416741490364076, "rewards/accuracy_reward": 0.21767578125, "rewards/brier_reward": 0.37440124750137327, "rewards/format_reward": 0.68125, "rewards/frontier_aurc_reward": 0.3006488770246506, "rewards/frontier_coverage_1": 0.3006488770246506, "rewards/frontier_coverage_10": 0.3006488770246506, "rewards/frontier_coverage_15": 0.3006488770246506, "rewards/frontier_coverage_20": 0.3006488770246506, "rewards/frontier_coverage_25": 0.3006488770246506, "rewards/frontier_coverage_5": 0.3006488770246506, "rewards/frontier_ece_reward": 0.3006488770246506, "rewards/frontier_entropy_batch_reward": -0.45452544689178465, "signal/accuracy_reward/centered_abs_mean": 0.238604736328125, "signal/accuracy_reward/group_bin_occupancy": 0.21171875, "signal/accuracy_reward/group_std_mean": 0.28302725255489347, "signal/accuracy_reward/group_zero_std_frac": 0.30625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1193023681640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1193023681640625, "signal/advantage_abs_mean": 0.32754938006401063, "signal/advantage_pre_scale_abs_mean": 0.32754938006401063, "signal/advantage_pre_scale_std": 0.39201714396476744, "signal/advantage_std": 0.39201714396476744, "signal/brier_reward/centered_abs_mean": 0.318459290266037, "signal/brier_reward/group_bin_occupancy": 0.746484375, "signal/brier_reward/group_std_mean": 0.36360487937927244, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028604864701628686, "signal/brier_reward/weight": 0.08982320576906204, "signal/brier_reward/weighted_centered_abs_mean": 0.028604864701628686, "signal/format_reward/centered_abs_mean": 0.40308837890625, "signal/format_reward/group_bin_occupancy": 0.25, "signal/format_reward/group_std_mean": 0.45328914523124697, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.201544189453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.201544189453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.2910795986652374, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6625, "signal/frontier_aurc_reward/group_std_mean": 0.3423191785812378, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0041722475551068785, "signal/frontier_aurc_reward/weight": 0.014333849586546422, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0041722475551068785, "signal/frontier_coverage_1/centered_abs_mean": 0.2910795986652374, "signal/frontier_coverage_1/group_bin_occupancy": 0.6625, "signal/frontier_coverage_1/group_std_mean": 0.3423191785812378, "signal/frontier_coverage_1/group_zero_std_frac": 0.003125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0041722475551068785, "signal/frontier_coverage_1/weight": 0.014333849586546422, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0041722475551068785, "signal/frontier_coverage_10/centered_abs_mean": 0.2910795986652374, "signal/frontier_coverage_10/group_bin_occupancy": 0.6625, "signal/frontier_coverage_10/group_std_mean": 0.3423191785812378, "signal/frontier_coverage_10/group_zero_std_frac": 0.003125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0041722475551068785, "signal/frontier_coverage_10/weight": 0.014333849586546422, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0041722475551068785, "signal/frontier_coverage_15/centered_abs_mean": 0.2910795986652374, "signal/frontier_coverage_15/group_bin_occupancy": 0.6625, "signal/frontier_coverage_15/group_std_mean": 0.3423191785812378, "signal/frontier_coverage_15/group_zero_std_frac": 0.003125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0041722475551068785, "signal/frontier_coverage_15/weight": 0.014333849586546422, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0041722475551068785, "signal/frontier_coverage_20/centered_abs_mean": 0.2910795986652374, "signal/frontier_coverage_20/group_bin_occupancy": 0.6625, "signal/frontier_coverage_20/group_std_mean": 0.3423191785812378, "signal/frontier_coverage_20/group_zero_std_frac": 0.003125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0041722475551068785, "signal/frontier_coverage_20/weight": 0.014333849586546422, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0041722475551068785, "signal/frontier_coverage_25/centered_abs_mean": 0.2910795986652374, "signal/frontier_coverage_25/group_bin_occupancy": 0.6625, "signal/frontier_coverage_25/group_std_mean": 0.3423191785812378, "signal/frontier_coverage_25/group_zero_std_frac": 0.003125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0041722475551068785, "signal/frontier_coverage_25/weight": 0.014333849586546422, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0041722475551068785, "signal/frontier_coverage_5/centered_abs_mean": 0.2910795986652374, "signal/frontier_coverage_5/group_bin_occupancy": 0.6625, "signal/frontier_coverage_5/group_std_mean": 0.3423191785812378, "signal/frontier_coverage_5/group_zero_std_frac": 0.003125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0041722475551068785, "signal/frontier_coverage_5/weight": 0.014333849586546422, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0041722475551068785, "signal/frontier_ece_reward/centered_abs_mean": 0.2910795986652374, "signal/frontier_ece_reward/group_bin_occupancy": 0.6625, "signal/frontier_ece_reward/group_std_mean": 0.3423191785812378, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02913580760359764, "signal/frontier_ece_reward/weight": 0.1000967189669609, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02913580760359764, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4769509553909302, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.434375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5222809553146363, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.10018042176961899, "signal/frontier_entropy_batch_reward/weight": 0.21004312336444855, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.10018042176961899, "step": 5 }, { "adaptive_ema/accuracy_reward": 0.23042025751920087, "adaptive_ema/brier_reward": 0.3819111222332293, "adaptive_ema/format_reward": 0.6741255697854202, "adaptive_ema/frontier_aurc_reward": 0.31098182470547453, "adaptive_ema/frontier_coverage_1": 0.31098182470547453, "adaptive_ema/frontier_coverage_10": 0.31098182470547453, "adaptive_ema/frontier_coverage_15": 0.31098182470547453, "adaptive_ema/frontier_coverage_20": 0.31098182470547453, "adaptive_ema/frontier_coverage_25": 0.31098182470547453, "adaptive_ema/frontier_coverage_5": 0.31098182470547453, "adaptive_ema/frontier_ece_reward": 0.31098182470547453, "adaptive_ema/frontier_entropy_batch_reward": -0.44455297453697823, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.08983151316642761, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.014340075105428696, "adaptive_weight/frontier_coverage_1": 0.014340075105428696, "adaptive_weight/frontier_coverage_10": 0.014340075105428696, "adaptive_weight/frontier_coverage_15": 0.014340075105428696, "adaptive_weight/frontier_coverage_20": 0.014340075105428696, "adaptive_weight/frontier_coverage_25": 0.014340075105428696, "adaptive_weight/frontier_coverage_5": 0.014340075105428696, "adaptive_weight/frontier_ece_reward": 0.10014019310474395, "adaptive_weight/frontier_entropy_batch_reward": 0.2099477618932724, "calibration/aurc": 0.6860147125059216, "calibration/batch_distribution_entropy": 0.6500051016919194, "calibration/batch_entropy_100bins": 0.4773603444617879, "calibration/batch_entropy_10bins": 0.6500051016919194, "calibration/batch_entropy_50bins": 0.5612185893380846, "calibration/batch_uniqueness": 0.7166415776631634, "calibration/confidence_entropy": 0.34568163418670883, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5364662682352269, "calibration/mean_confidence": 0.7955750590240586, "calibration/prompt_uniqueness": 0.609550450226551, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 265.6697265625, "completions/mean_terminated_length": 214.0361083984375, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.03242058679461479, "learning_rate": 6.249999999999999e-07, "loss": 0.0646, "num_tokens": 35412271.0, "reward": 0.4505932092666626, "reward_std": 0.35865501761436464, "rewards/accuracy_reward": 0.20458984375, "rewards/brier_reward": 0.37263244986534116, "rewards/format_reward": 0.7111328125, "rewards/frontier_aurc_reward": 0.2930680811405182, "rewards/frontier_coverage_1": 0.2930680811405182, "rewards/frontier_coverage_10": 0.2930680811405182, "rewards/frontier_coverage_15": 0.2930680811405182, "rewards/frontier_coverage_20": 0.2930680811405182, "rewards/frontier_coverage_25": 0.2930680811405182, "rewards/frontier_coverage_5": 0.2930680811405182, "rewards/frontier_ece_reward": 0.2930680811405182, "rewards/frontier_entropy_batch_reward": -0.4739628076553345, "signal/accuracy_reward/centered_abs_mean": 0.218255615234375, "signal/accuracy_reward/group_bin_occupancy": 0.209765625, "signal/accuracy_reward/group_std_mean": 0.2656914174556732, "signal/accuracy_reward/group_zero_std_frac": 0.321875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1091278076171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1091278076171875, "signal/advantage_abs_mean": 0.3003044664859772, "signal/advantage_pre_scale_abs_mean": 0.3003044664859772, "signal/advantage_pre_scale_std": 0.3666377246379852, "signal/advantage_std": 0.3666377246379852, "signal/brier_reward/centered_abs_mean": 0.3034407377243042, "signal/brier_reward/group_bin_occupancy": 0.7671875, "signal/brier_reward/group_std_mean": 0.35142738819122316, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027258511632680893, "signal/brier_reward/weight": 0.08983151316642761, "signal/brier_reward/weighted_centered_abs_mean": 0.027258511632680893, "signal/format_reward/centered_abs_mean": 0.37913818359375, "signal/format_reward/group_bin_occupancy": 0.25, "signal/format_reward/group_std_mean": 0.439016717672348, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.189569091796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.189569091796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.2739542663097382, "signal/frontier_aurc_reward/group_bin_occupancy": 0.666796875, "signal/frontier_aurc_reward/group_std_mean": 0.3285772979259491, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003928511589765549, "signal/frontier_aurc_reward/weight": 0.014340075105428696, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003928511589765549, "signal/frontier_coverage_1/centered_abs_mean": 0.2739542663097382, "signal/frontier_coverage_1/group_bin_occupancy": 0.666796875, "signal/frontier_coverage_1/group_std_mean": 0.3285772979259491, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003928511589765549, "signal/frontier_coverage_1/weight": 0.014340075105428696, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003928511589765549, "signal/frontier_coverage_10/centered_abs_mean": 0.2739542663097382, "signal/frontier_coverage_10/group_bin_occupancy": 0.666796875, "signal/frontier_coverage_10/group_std_mean": 0.3285772979259491, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003928511589765549, "signal/frontier_coverage_10/weight": 0.014340075105428696, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003928511589765549, "signal/frontier_coverage_15/centered_abs_mean": 0.2739542663097382, "signal/frontier_coverage_15/group_bin_occupancy": 0.666796875, "signal/frontier_coverage_15/group_std_mean": 0.3285772979259491, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003928511589765549, "signal/frontier_coverage_15/weight": 0.014340075105428696, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003928511589765549, "signal/frontier_coverage_20/centered_abs_mean": 0.2739542663097382, "signal/frontier_coverage_20/group_bin_occupancy": 0.666796875, "signal/frontier_coverage_20/group_std_mean": 0.3285772979259491, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003928511589765549, "signal/frontier_coverage_20/weight": 0.014340075105428696, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003928511589765549, "signal/frontier_coverage_25/centered_abs_mean": 0.2739542663097382, "signal/frontier_coverage_25/group_bin_occupancy": 0.666796875, "signal/frontier_coverage_25/group_std_mean": 0.3285772979259491, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003928511589765549, "signal/frontier_coverage_25/weight": 0.014340075105428696, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003928511589765549, "signal/frontier_coverage_5/centered_abs_mean": 0.2739542663097382, "signal/frontier_coverage_5/group_bin_occupancy": 0.666796875, "signal/frontier_coverage_5/group_std_mean": 0.3285772979259491, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003928511589765549, "signal/frontier_coverage_5/weight": 0.014340075105428696, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003928511589765549, "signal/frontier_ece_reward/centered_abs_mean": 0.2739542663097382, "signal/frontier_ece_reward/group_bin_occupancy": 0.666796875, "signal/frontier_ece_reward/group_std_mean": 0.3285772979259491, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.027433741465210916, "signal/frontier_ece_reward/weight": 0.10014019310474395, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.027433741465210916, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4785237729549408, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.441796875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5272870898246765, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.10046486407518387, "signal/frontier_entropy_batch_reward/weight": 0.2099477618932724, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.10046486407518387, "step": 10 }, { "adaptive_ema/accuracy_reward": 0.2303851249087534, "adaptive_ema/brier_reward": 0.384100495225017, "adaptive_ema/format_reward": 0.6799472793515441, "adaptive_ema/frontier_aurc_reward": 0.31133432126378163, "adaptive_ema/frontier_coverage_1": 0.31152009604459147, "adaptive_ema/frontier_coverage_10": 0.31152009604459147, "adaptive_ema/frontier_coverage_15": 0.31152009604459147, "adaptive_ema/frontier_coverage_20": 0.31152009604459147, "adaptive_ema/frontier_coverage_25": 0.31152009604459147, "adaptive_ema/frontier_coverage_5": 0.31152009604459147, "adaptive_ema/frontier_ece_reward": 0.31121861117666105, "adaptive_ema/frontier_entropy_batch_reward": -0.44775436091818743, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.08950651288032532, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.014331639185547829, "adaptive_weight/frontier_coverage_1": 0.01432777550071478, "adaptive_weight/frontier_coverage_10": 0.01432777550071478, "adaptive_weight/frontier_coverage_15": 0.01432777550071478, "adaptive_weight/frontier_coverage_20": 0.01432777550071478, "adaptive_weight/frontier_coverage_25": 0.01432777550071478, "adaptive_weight/frontier_coverage_5": 0.01432777550071478, "adaptive_weight/frontier_ece_reward": 0.10009808540344238, "adaptive_weight/frontier_entropy_batch_reward": 0.21039710342884063, "calibration/aurc": 0.6049186984773813, "calibration/batch_distribution_entropy": 0.6647650329327803, "calibration/batch_entropy_100bins": 0.486494845315269, "calibration/batch_entropy_10bins": 0.6647650329327803, "calibration/batch_entropy_50bins": 0.5687127520068225, "calibration/batch_uniqueness": 0.7120603469709288, "calibration/buffer_distribution_entropy": 0.6659962612629504, "calibration/buffer_entropy_100bins": 0.49174012175065746, "calibration/buffer_entropy_10bins": 0.6659962612629504, "calibration/buffer_entropy_50bins": 0.5750999921475161, "calibration/confidence_entropy": 0.3532576119604358, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4695792380430694, "calibration/mean_confidence": 0.7954697931195405, "calibration/prompt_uniqueness": 0.623697074648137, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019921875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1450.8, "completions/mean_length": 212.9087890625, "completions/mean_terminated_length": 186.10792541503906, "completions/min_length": 11.8, "completions/min_terminated_length": 11.8, "epoch": 0.048, "grad_norm": 0.012215187773108482, "learning_rate": 9.374999999999999e-07, "loss": 0.0459, "num_tokens": 52641193.0, "reward": 0.5511215567588806, "reward_std": 0.3094262957572937, "rewards/accuracy_reward": 0.25849609375, "rewards/brier_reward": 0.4815207779407501, "rewards/format_reward": 0.86337890625, "rewards/frontier_aurc_reward": 0.292304290086031, "rewards/frontier_coverage_1": 0.3108817681670189, "rewards/frontier_coverage_10": 0.3108817681670189, "rewards/frontier_coverage_15": 0.3108817681670189, "rewards/frontier_coverage_20": 0.3108817681670189, "rewards/frontier_coverage_25": 0.3108817681670189, "rewards/frontier_coverage_5": 0.3108817681670189, "rewards/frontier_ece_reward": 0.2807332813739777, "rewards/frontier_entropy_batch_reward": -0.5326027274131775, "signal/accuracy_reward/centered_abs_mean": 0.201080322265625, "signal/accuracy_reward/group_bin_occupancy": 0.205078125, "signal/accuracy_reward/group_std_mean": 0.24860720038414003, "signal/accuracy_reward/group_zero_std_frac": 0.359375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1005401611328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1005401611328125, "signal/advantage_abs_mean": 0.2522279918193817, "signal/advantage_pre_scale_abs_mean": 0.2522279918193817, "signal/advantage_pre_scale_std": 0.3164039790630341, "signal/advantage_std": 0.3164039790630341, "signal/brier_reward/centered_abs_mean": 0.28035444021224976, "signal/brier_reward/group_bin_occupancy": 0.812109375, "signal/brier_reward/group_std_mean": 0.3334766149520874, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025098316371440887, "signal/brier_reward/weight": 0.08950651288032532, "signal/brier_reward/weighted_centered_abs_mean": 0.025098316371440887, "signal/format_reward/centered_abs_mean": 0.218707275390625, "signal/format_reward/group_bin_occupancy": 0.244921875, "signal/format_reward/group_std_mean": 0.31805049777030947, "signal/format_reward/group_zero_std_frac": 0.040625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1093536376953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1093536376953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.22175134532153606, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7234375, "signal/frontier_aurc_reward/group_std_mean": 0.26504507940262556, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003176678024465218, "signal/frontier_aurc_reward/weight": 0.014331639185547829, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003176678024465218, "signal/frontier_coverage_1/centered_abs_mean": 0.24584266245365144, "signal/frontier_coverage_1/group_bin_occupancy": 0.716015625, "signal/frontier_coverage_1/group_std_mean": 0.3008487790822983, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003522046422585845, "signal/frontier_coverage_1/weight": 0.01432777550071478, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003522046422585845, "signal/frontier_coverage_10/centered_abs_mean": 0.24584266245365144, "signal/frontier_coverage_10/group_bin_occupancy": 0.716015625, "signal/frontier_coverage_10/group_std_mean": 0.3008487790822983, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003522046422585845, "signal/frontier_coverage_10/weight": 0.01432777550071478, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003522046422585845, "signal/frontier_coverage_15/centered_abs_mean": 0.24584266245365144, "signal/frontier_coverage_15/group_bin_occupancy": 0.716015625, "signal/frontier_coverage_15/group_std_mean": 0.3008487790822983, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003522046422585845, "signal/frontier_coverage_15/weight": 0.01432777550071478, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003522046422585845, "signal/frontier_coverage_20/centered_abs_mean": 0.24584266245365144, "signal/frontier_coverage_20/group_bin_occupancy": 0.716015625, "signal/frontier_coverage_20/group_std_mean": 0.3008487790822983, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003522046422585845, "signal/frontier_coverage_20/weight": 0.01432777550071478, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003522046422585845, "signal/frontier_coverage_25/centered_abs_mean": 0.24584266245365144, "signal/frontier_coverage_25/group_bin_occupancy": 0.716015625, "signal/frontier_coverage_25/group_std_mean": 0.3008487790822983, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003522046422585845, "signal/frontier_coverage_25/weight": 0.01432777550071478, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003522046422585845, "signal/frontier_coverage_5/centered_abs_mean": 0.24584266245365144, "signal/frontier_coverage_5/group_bin_occupancy": 0.716015625, "signal/frontier_coverage_5/group_std_mean": 0.3008487790822983, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003522046422585845, "signal/frontier_coverage_5/weight": 0.01432777550071478, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003522046422585845, "signal/frontier_ece_reward/centered_abs_mean": 0.24515217244625093, "signal/frontier_ece_reward/group_bin_occupancy": 0.716796875, "signal/frontier_ece_reward/group_std_mean": 0.294309064745903, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.024531540647149085, "signal/frontier_ece_reward/weight": 0.10009808540344238, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.024531540647149085, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4544231414794922, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5183358907699585, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.09560682773590087, "signal/frontier_entropy_batch_reward/weight": 0.21039710342884063, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.09560682773590087, "step": 15 }, { "adaptive_ema/accuracy_reward": 0.23426954152107396, "adaptive_ema/brier_reward": 0.39307066305031424, "adaptive_ema/format_reward": 0.6926847671353432, "adaptive_ema/frontier_aurc_reward": 0.300348126568556, "adaptive_ema/frontier_coverage_1": 0.30403770600747176, "adaptive_ema/frontier_coverage_10": 0.30403770600747176, "adaptive_ema/frontier_coverage_15": 0.30403770600747176, "adaptive_ema/frontier_coverage_20": 0.30403770600747176, "adaptive_ema/frontier_coverage_25": 0.30403770600747176, "adaptive_ema/frontier_coverage_5": 0.30403770600747176, "adaptive_ema/frontier_ece_reward": 0.29847388797844887, "adaptive_ema/frontier_entropy_batch_reward": -0.45138858030431095, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.08781057894229889, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.014495300687849521, "adaptive_weight/frontier_coverage_1": 0.014418897405266761, "adaptive_weight/frontier_coverage_10": 0.014418897405266761, "adaptive_weight/frontier_coverage_15": 0.014418897405266761, "adaptive_weight/frontier_coverage_20": 0.014418897405266761, "adaptive_weight/frontier_coverage_25": 0.014418897405266761, "adaptive_weight/frontier_coverage_5": 0.014418897405266761, "adaptive_weight/frontier_ece_reward": 0.10149525552988052, "adaptive_weight/frontier_entropy_batch_reward": 0.20998547673225404, "calibration/aurc": 0.5263760901597899, "calibration/batch_distribution_entropy": 0.7777311383989622, "calibration/batch_entropy_100bins": 0.5323331927952004, "calibration/batch_entropy_10bins": 0.7777311383989622, "calibration/batch_entropy_50bins": 0.6220234792840525, "calibration/batch_uniqueness": 0.7772333424506105, "calibration/buffer_distribution_entropy": 0.6780116671270415, "calibration/buffer_entropy_100bins": 0.4986571778459387, "calibration/buffer_entropy_10bins": 0.6780116671270415, "calibration/buffer_entropy_50bins": 0.5828922877202816, "calibration/confidence_entropy": 0.4182842284514491, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.30854319958051096, "calibration/mean_confidence": 0.7003253907676882, "calibration/prompt_uniqueness": 0.6911273436300276, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005078125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1414.8, "completions/mean_length": 154.23994140625, "completions/mean_terminated_length": 147.2019500732422, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 0.064, "grad_norm": 0.02356830984354019, "learning_rate": 1e-06, "loss": 0.0112, "num_tokens": 69139010.0, "reward": 0.6089733481407166, "reward_std": 0.22602881789207457, "rewards/accuracy_reward": 0.33115234375, "rewards/brier_reward": 0.6209439873695374, "rewards/format_reward": 0.97041015625, "rewards/frontier_aurc_reward": -0.006396393664181233, "rewards/frontier_coverage_1": 0.09709094911813736, "rewards/frontier_coverage_10": 0.09709094911813736, "rewards/frontier_coverage_15": 0.09709094911813736, "rewards/frontier_coverage_20": 0.09709094911813736, "rewards/frontier_coverage_25": 0.09709094911813736, "rewards/frontier_coverage_5": 0.09709094911813736, "rewards/frontier_ece_reward": -0.046480930084362625, "rewards/frontier_entropy_batch_reward": -0.4765442192554474, "signal/accuracy_reward/centered_abs_mean": 0.205462646484375, "signal/accuracy_reward/group_bin_occupancy": 0.208203125, "signal/accuracy_reward/group_std_mean": 0.25680303275585176, "signal/accuracy_reward/group_zero_std_frac": 0.334375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1027313232421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1027313232421875, "signal/advantage_abs_mean": 0.17884572744369506, "signal/advantage_pre_scale_abs_mean": 0.17884572744369506, "signal/advantage_pre_scale_std": 0.2316117614507675, "signal/advantage_std": 0.2316117614507675, "signal/brier_reward/centered_abs_mean": 0.23778945803642274, "signal/brier_reward/group_bin_occupancy": 0.852734375, "signal/brier_reward/group_std_mean": 0.29429731965065004, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020891552045941352, "signal/brier_reward/weight": 0.08781057894229889, "signal/brier_reward/weighted_centered_abs_mean": 0.020891552045941352, "signal/format_reward/centered_abs_mean": 0.054486083984375, "signal/format_reward/group_bin_occupancy": 0.196484375, "signal/format_reward/group_std_mean": 0.1227356806397438, "signal/format_reward/group_zero_std_frac": 0.428125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0272430419921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0272430419921875, "signal/frontier_aurc_reward/centered_abs_mean": 0.00433549745939672, "signal/frontier_aurc_reward/group_bin_occupancy": 0.72109375, "signal/frontier_aurc_reward/group_std_mean": 0.006195350922644139, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.27727509709075e-05, "signal/frontier_aurc_reward/weight": 0.014495300687849521, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.27727509709075e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14411845207214355, "signal/frontier_coverage_1/group_bin_occupancy": 0.744140625, "signal/frontier_coverage_1/group_std_mean": 0.20870547592639924, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002079009125009179, "signal/frontier_coverage_1/weight": 0.014418897405266761, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002079009125009179, "signal/frontier_coverage_10/centered_abs_mean": 0.14411845207214355, "signal/frontier_coverage_10/group_bin_occupancy": 0.744140625, "signal/frontier_coverage_10/group_std_mean": 0.20870547592639924, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002079009125009179, "signal/frontier_coverage_10/weight": 0.014418897405266761, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002079009125009179, "signal/frontier_coverage_15/centered_abs_mean": 0.14411845207214355, "signal/frontier_coverage_15/group_bin_occupancy": 0.744140625, "signal/frontier_coverage_15/group_std_mean": 0.20870547592639924, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002079009125009179, "signal/frontier_coverage_15/weight": 0.014418897405266761, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002079009125009179, "signal/frontier_coverage_20/centered_abs_mean": 0.14411845207214355, "signal/frontier_coverage_20/group_bin_occupancy": 0.744140625, "signal/frontier_coverage_20/group_std_mean": 0.20870547592639924, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002079009125009179, "signal/frontier_coverage_20/weight": 0.014418897405266761, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002079009125009179, "signal/frontier_coverage_25/centered_abs_mean": 0.14411845207214355, "signal/frontier_coverage_25/group_bin_occupancy": 0.744140625, "signal/frontier_coverage_25/group_std_mean": 0.20870547592639924, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002079009125009179, "signal/frontier_coverage_25/weight": 0.014418897405266761, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002079009125009179, "signal/frontier_coverage_5/centered_abs_mean": 0.14411845207214355, "signal/frontier_coverage_5/group_bin_occupancy": 0.744140625, "signal/frontier_coverage_5/group_std_mean": 0.20870547592639924, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002079009125009179, "signal/frontier_coverage_5/weight": 0.014418897405266761, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002079009125009179, "signal/frontier_ece_reward/centered_abs_mean": 0.12881973385810852, "signal/frontier_ece_reward/group_bin_occupancy": 0.745703125, "signal/frontier_ece_reward/group_std_mean": 0.15685342252254486, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013068030402064324, "signal/frontier_ece_reward/weight": 0.10149525552988052, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013068030402064324, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4035856008529663, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.641796875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.48287264108657835, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0847533330321312, "signal/frontier_entropy_batch_reward/weight": 0.20998547673225404, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0847533330321312, "step": 20 }, { "adaptive_ema/accuracy_reward": 0.23955027289038525, "adaptive_ema/brier_reward": 0.4080646915669138, "adaptive_ema/format_reward": 0.707112215851047, "adaptive_ema/frontier_aurc_reward": 0.28535526680946016, "adaptive_ema/frontier_coverage_1": 0.29610780835781203, "adaptive_ema/frontier_coverage_10": 0.29610780835781203, "adaptive_ema/frontier_coverage_15": 0.29610780835781203, "adaptive_ema/frontier_coverage_20": 0.29610780835781203, "adaptive_ema/frontier_coverage_25": 0.29610780835781203, "adaptive_ema/frontier_coverage_5": 0.29610780835781203, "adaptive_ema/frontier_ece_reward": 0.28265504878692915, "adaptive_ema/frontier_entropy_batch_reward": -0.4445421524261969, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.08556736111640931, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.014793501235544682, "adaptive_weight/frontier_coverage_1": 0.01457088440656662, "adaptive_weight/frontier_coverage_10": 0.01457088440656662, "adaptive_weight/frontier_coverage_15": 0.01457088440656662, "adaptive_weight/frontier_coverage_20": 0.01457088440656662, "adaptive_weight/frontier_coverage_25": 0.01457088440656662, "adaptive_weight/frontier_coverage_5": 0.01457088440656662, "adaptive_weight/frontier_ece_reward": 0.10369691401720046, "adaptive_weight/frontier_entropy_batch_reward": 0.20881690979003906, "calibration/aurc": 0.6171283887858363, "calibration/batch_distribution_entropy": 0.8674204051435778, "calibration/batch_entropy_100bins": 0.5776314716132508, "calibration/batch_entropy_10bins": 0.8674204051435778, "calibration/batch_entropy_50bins": 0.6761122944661062, "calibration/batch_uniqueness": 0.8069641110167801, "calibration/buffer_distribution_entropy": 0.7525090521356054, "calibration/buffer_entropy_100bins": 0.5323303585319001, "calibration/buffer_entropy_10bins": 0.7525090521356054, "calibration/buffer_entropy_50bins": 0.622013500945575, "calibration/confidence_entropy": 0.503978766149048, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.2100240513244424, "calibration/mean_confidence": 0.47824112817970243, "calibration/prompt_uniqueness": 0.7171275138018345, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00146484375, "completions/max_length": 1536.0, "completions/max_terminated_length": 901.6, "completions/mean_length": 130.5357421875, "completions/mean_terminated_length": 128.4743423461914, "completions/min_length": 21.8, "completions/min_terminated_length": 21.8, "epoch": 0.08, "grad_norm": 0.02094282954931259, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 85408848.0, "reward": 0.6991242527961731, "reward_std": 0.17349291741847991, "rewards/accuracy_reward": 0.34111328125, "rewards/brier_reward": 0.7374354720115661, "rewards/format_reward": 0.9921875, "rewards/frontier_aurc_reward": -0.005245448090136051, "rewards/frontier_coverage_1": 0.17646725475788116, "rewards/frontier_coverage_10": 0.17646725475788116, "rewards/frontier_coverage_15": 0.17646725475788116, "rewards/frontier_coverage_20": 0.17646725475788116, "rewards/frontier_coverage_25": 0.17646725475788116, "rewards/frontier_coverage_5": 0.17646725475788116, "rewards/frontier_ece_reward": -0.01268238362390548, "rewards/frontier_entropy_batch_reward": -0.21498993635177613, "signal/accuracy_reward/centered_abs_mean": 0.190802001953125, "signal/accuracy_reward/group_bin_occupancy": 0.204296875, "signal/accuracy_reward/group_std_mean": 0.24003153443336486, "signal/accuracy_reward/group_zero_std_frac": 0.365625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0954010009765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0954010009765625, "signal/advantage_abs_mean": 0.13710006028413774, "signal/advantage_pre_scale_abs_mean": 0.13710006028413774, "signal/advantage_pre_scale_std": 0.1836077630519867, "signal/advantage_std": 0.1836077630519867, "signal/brier_reward/centered_abs_mean": 0.18633546233177184, "signal/brier_reward/group_bin_occupancy": 0.838671875, "signal/brier_reward/group_std_mean": 0.24008690118789672, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015948853641748428, "signal/brier_reward/weight": 0.08556736111640931, "signal/brier_reward/weighted_centered_abs_mean": 0.015948853641748428, "signal/format_reward/centered_abs_mean": 0.01419677734375, "signal/format_reward/group_bin_occupancy": 0.147265625, "signal/format_reward/group_std_mean": 0.034811738133430484, "signal/format_reward/group_zero_std_frac": 0.821875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007098388671875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007098388671875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022644552402198313, "signal/frontier_aurc_reward/group_bin_occupancy": 0.683984375, "signal/frontier_aurc_reward/group_std_mean": 0.003705321438610554, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3452448406023905e-05, "signal/frontier_aurc_reward/weight": 0.014793501235544682, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3452448406023905e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2472827762365341, "signal/frontier_coverage_1/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_1/group_std_mean": 0.3209026396274567, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003604399133473635, "signal/frontier_coverage_1/weight": 0.01457088440656662, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003604399133473635, "signal/frontier_coverage_10/centered_abs_mean": 0.2472827762365341, "signal/frontier_coverage_10/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_10/group_std_mean": 0.3209026396274567, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003604399133473635, "signal/frontier_coverage_10/weight": 0.01457088440656662, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003604399133473635, "signal/frontier_coverage_15/centered_abs_mean": 0.2472827762365341, "signal/frontier_coverage_15/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_15/group_std_mean": 0.3209026396274567, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003604399133473635, "signal/frontier_coverage_15/weight": 0.01457088440656662, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003604399133473635, "signal/frontier_coverage_20/centered_abs_mean": 0.2472827762365341, "signal/frontier_coverage_20/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_20/group_std_mean": 0.3209026396274567, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003604399133473635, "signal/frontier_coverage_20/weight": 0.01457088440656662, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003604399133473635, "signal/frontier_coverage_25/centered_abs_mean": 0.2472827762365341, "signal/frontier_coverage_25/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_25/group_std_mean": 0.3209026396274567, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003604399133473635, "signal/frontier_coverage_25/weight": 0.01457088440656662, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003604399133473635, "signal/frontier_coverage_5/centered_abs_mean": 0.2472827762365341, "signal/frontier_coverage_5/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_5/group_std_mean": 0.3209026396274567, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003604399133473635, "signal/frontier_coverage_5/weight": 0.01457088440656662, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003604399133473635, "signal/frontier_ece_reward/centered_abs_mean": 0.08847985863685608, "signal/frontier_ece_reward/group_bin_occupancy": 0.775, "signal/frontier_ece_reward/group_std_mean": 0.11089985370635987, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009167110547423363, "signal/frontier_ece_reward/weight": 0.10369691401720046, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009167110547423363, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3149270832538605, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.592578125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3907872557640076, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.06577974408864976, "signal/frontier_entropy_batch_reward/weight": 0.20881690979003906, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.06577974408864976, "step": 25 }, { "adaptive_ema/accuracy_reward": 0.24428599930218375, "adaptive_ema/brier_reward": 0.42536614088638436, "adaptive_ema/format_reward": 0.7211933273616375, "adaptive_ema/frontier_aurc_reward": 0.27112399060683245, "adaptive_ema/frontier_coverage_1": 0.2919919365600858, "adaptive_ema/frontier_coverage_10": 0.2919919365600858, "adaptive_ema/frontier_coverage_15": 0.2919919365600858, "adaptive_ema/frontier_coverage_20": 0.2919919365600858, "adaptive_ema/frontier_coverage_25": 0.2919919365600858, "adaptive_ema/frontier_coverage_5": 0.2919919365600858, "adaptive_ema/frontier_ece_reward": 0.2685948928242548, "adaptive_ema/frontier_entropy_batch_reward": -0.43403338201474606, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.08326314240694047, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.015123799070715904, "adaptive_weight/frontier_coverage_1": 0.014690774492919445, "adaptive_weight/frontier_coverage_10": 0.014690774492919445, "adaptive_weight/frontier_coverage_15": 0.014690774492919445, "adaptive_weight/frontier_coverage_20": 0.014690774492919445, "adaptive_weight/frontier_coverage_25": 0.014690774492919445, "adaptive_weight/frontier_coverage_5": 0.014690774492919445, "adaptive_weight/frontier_ece_reward": 0.10597957968711853, "adaptive_weight/frontier_entropy_batch_reward": 0.20778882503509521, "calibration/aurc": 0.6043924761417212, "calibration/batch_distribution_entropy": 0.8436481433817444, "calibration/batch_entropy_100bins": 0.6037587687908283, "calibration/batch_entropy_10bins": 0.8436481433817444, "calibration/batch_entropy_50bins": 0.6966506318827843, "calibration/batch_uniqueness": 0.827812804990921, "calibration/buffer_distribution_entropy": 0.8374224803093785, "calibration/buffer_entropy_100bins": 0.5783989963531001, "calibration/buffer_entropy_10bins": 0.8374224803093785, "calibration/buffer_entropy_50bins": 0.6750286362079907, "calibration/confidence_entropy": 0.4849612253994723, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.005893160413771808, "calibration/coverage@20%": 0.008649860727033207, "calibration/coverage@25%": 0.012989111220129853, "calibration/coverage@30%": 0.016926119094145604, "calibration/coverage@5%": 0.0, "calibration/ece": 0.1533266415432468, "calibration/mean_confidence": 0.3279998444013065, "calibration/prompt_uniqueness": 0.7420654654801133, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 1536.0, "completions/max_terminated_length": 695.4, "completions/mean_length": 132.56982421875, "completions/mean_terminated_length": 131.06031799316406, "completions/min_length": 36.8, "completions/min_terminated_length": 36.8, "epoch": 0.096, "grad_norm": 0.0061478931456804276, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 101810971.0, "reward": 0.6923112273216248, "reward_std": 0.15285933017730713, "rewards/accuracy_reward": 0.3416015625, "rewards/brier_reward": 0.7662726521492005, "rewards/format_reward": 0.99560546875, "rewards/frontier_aurc_reward": -0.004837475996464491, "rewards/frontier_coverage_1": 0.2223384290933609, "rewards/frontier_coverage_10": 0.2223384290933609, "rewards/frontier_coverage_15": 0.2223384290933609, "rewards/frontier_coverage_20": 0.2223384290933609, "rewards/frontier_coverage_25": 0.2223384290933609, "rewards/frontier_coverage_5": 0.2223384290933609, "rewards/frontier_ece_reward": 6.405212916433811e-05, "rewards/frontier_entropy_batch_reward": -0.288457790017128, "signal/accuracy_reward/centered_abs_mean": 0.18267822265625, "signal/accuracy_reward/group_bin_occupancy": 0.203515625, "signal/accuracy_reward/group_std_mean": 0.23303787410259247, "signal/accuracy_reward/group_zero_std_frac": 0.371875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.091339111328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.091339111328125, "signal/advantage_abs_mean": 0.12113873660564423, "signal/advantage_pre_scale_abs_mean": 0.12113873660564423, "signal/advantage_pre_scale_std": 0.16447269320487976, "signal/advantage_std": 0.16447269320487976, "signal/brier_reward/centered_abs_mean": 0.17132785618305207, "signal/brier_reward/group_bin_occupancy": 0.8296875, "signal/brier_reward/group_std_mean": 0.22054792940616608, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01426246203482151, "signal/brier_reward/weight": 0.08326314240694047, "signal/brier_reward/weighted_centered_abs_mean": 0.01426246203482151, "signal/format_reward/centered_abs_mean": 0.008465576171875, "signal/format_reward/group_bin_occupancy": 0.141015625, "signal/format_reward/group_std_mean": 0.02351398840546608, "signal/format_reward/group_zero_std_frac": 0.871875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0042327880859375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0042327880859375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015173472464084625, "signal/frontier_aurc_reward/group_bin_occupancy": 0.683203125, "signal/frontier_aurc_reward/group_std_mean": 0.002604399994015694, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.293462930538226e-05, "signal/frontier_aurc_reward/weight": 0.015123799070715904, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.293462930538226e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2993292808532715, "signal/frontier_coverage_1/group_bin_occupancy": 0.9125, "signal/frontier_coverage_1/group_std_mean": 0.3749782383441925, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0043978951405733826, "signal/frontier_coverage_1/weight": 0.014690774492919445, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0043978951405733826, "signal/frontier_coverage_10/centered_abs_mean": 0.2993292808532715, "signal/frontier_coverage_10/group_bin_occupancy": 0.9125, "signal/frontier_coverage_10/group_std_mean": 0.3749782383441925, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0043978951405733826, "signal/frontier_coverage_10/weight": 0.014690774492919445, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0043978951405733826, "signal/frontier_coverage_15/centered_abs_mean": 0.2993292808532715, "signal/frontier_coverage_15/group_bin_occupancy": 0.9125, "signal/frontier_coverage_15/group_std_mean": 0.3749782383441925, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0043978951405733826, "signal/frontier_coverage_15/weight": 0.014690774492919445, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0043978951405733826, "signal/frontier_coverage_20/centered_abs_mean": 0.2993292808532715, "signal/frontier_coverage_20/group_bin_occupancy": 0.9125, "signal/frontier_coverage_20/group_std_mean": 0.3749782383441925, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0043978951405733826, "signal/frontier_coverage_20/weight": 0.014690774492919445, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0043978951405733826, "signal/frontier_coverage_25/centered_abs_mean": 0.2993292808532715, "signal/frontier_coverage_25/group_bin_occupancy": 0.9125, "signal/frontier_coverage_25/group_std_mean": 0.3749782383441925, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0043978951405733826, "signal/frontier_coverage_25/weight": 0.014690774492919445, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0043978951405733826, "signal/frontier_coverage_5/centered_abs_mean": 0.2993292808532715, "signal/frontier_coverage_5/group_bin_occupancy": 0.9125, "signal/frontier_coverage_5/group_std_mean": 0.3749782383441925, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0043978951405733826, "signal/frontier_coverage_5/weight": 0.014690774492919445, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0043978951405733826, "signal/frontier_ece_reward/centered_abs_mean": 0.053389621526002885, "signal/frontier_ece_reward/group_bin_occupancy": 0.63359375, "signal/frontier_ece_reward/group_std_mean": 0.07536848038434982, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0056538769975304605, "signal/frontier_ece_reward/weight": 0.10597957968711853, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0056538769975304605, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35766748189926145, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.645703125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4165478765964508, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.07431503981351853, "signal/frontier_entropy_batch_reward/weight": 0.20778882503509521, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.07431503981351853, "step": 30 }, { "adaptive_ema/accuracy_reward": 0.24984681717517115, "adaptive_ema/brier_reward": 0.4419331664070724, "adaptive_ema/format_reward": 0.7346662711486284, "adaptive_ema/frontier_aurc_reward": 0.2576118949172257, "adaptive_ema/frontier_coverage_1": 0.2884707104423859, "adaptive_ema/frontier_coverage_10": 0.2884707104423859, "adaptive_ema/frontier_coverage_15": 0.2884707104423859, "adaptive_ema/frontier_coverage_20": 0.2884707104423859, "adaptive_ema/frontier_coverage_25": 0.2884707104423859, "adaptive_ema/frontier_coverage_5": 0.2884707104423859, "adaptive_ema/frontier_ece_reward": 0.2556223252489979, "adaptive_ema/frontier_entropy_batch_reward": -0.4271526013679908, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.080991829931736, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.015428835526108742, "adaptive_weight/frontier_coverage_1": 0.014787467941641808, "adaptive_weight/frontier_coverage_10": 0.014787467941641808, "adaptive_weight/frontier_coverage_15": 0.014787467941641808, "adaptive_weight/frontier_coverage_20": 0.014787467941641808, "adaptive_weight/frontier_coverage_25": 0.014787467941641808, "adaptive_weight/frontier_coverage_5": 0.014787467941641808, "adaptive_weight/frontier_ece_reward": 0.10803199261426925, "adaptive_weight/frontier_entropy_batch_reward": 0.20712252855300903, "calibration/aurc": 0.47325860188954866, "calibration/batch_distribution_entropy": 0.861042317416165, "calibration/batch_entropy_100bins": 0.6235645062258767, "calibration/batch_entropy_10bins": 0.861042317416165, "calibration/batch_entropy_50bins": 0.7179239308446073, "calibration/batch_uniqueness": 0.8427199272494017, "calibration/buffer_distribution_entropy": 0.8889694652509454, "calibration/buffer_entropy_100bins": 0.6126072202975525, "calibration/buffer_entropy_10bins": 0.8889694652509454, "calibration/buffer_entropy_50bins": 0.7119972466901847, "calibration/confidence_entropy": 0.48597315663886775, "calibration/coverage@0%": 0.002734375, "calibration/coverage@1%": 0.002734375, "calibration/coverage@10%": 0.002734375, "calibration/coverage@15%": 0.002734375, "calibration/coverage@20%": 0.0046875, "calibration/coverage@25%": 0.034375, "calibration/coverage@30%": 0.058203125, "calibration/coverage@5%": 0.002734375, "calibration/ece": 0.14211889568254338, "calibration/mean_confidence": 0.34548752300222507, "calibration/prompt_uniqueness": 0.7481377735594432, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1094.4, "completions/max_terminated_length": 845.4, "completions/mean_length": 139.4576171875, "completions/mean_terminated_length": 138.77519836425782, "completions/min_length": 26.8, "completions/min_terminated_length": 26.8, "epoch": 0.112, "grad_norm": 0.003297725459560752, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 118348489.0, "reward": 0.7215077996253967, "reward_std": 0.13930552899837495, "rewards/accuracy_reward": 0.383984375, "rewards/brier_reward": 0.761021614074707, "rewards/format_reward": 0.9966796875, "rewards/frontier_aurc_reward": -0.004353551845997572, "rewards/frontier_coverage_1": 0.2021395444869995, "rewards/frontier_coverage_10": 0.2021395444869995, "rewards/frontier_coverage_15": 0.2021395444869995, "rewards/frontier_coverage_20": 0.2021395444869995, "rewards/frontier_coverage_25": 0.2021395444869995, "rewards/frontier_coverage_5": 0.2021395444869995, "rewards/frontier_ece_reward": 0.007823611074127258, "rewards/frontier_entropy_batch_reward": -0.23856934309005737, "signal/accuracy_reward/centered_abs_mean": 0.18026123046875, "signal/accuracy_reward/group_bin_occupancy": 0.201171875, "signal/accuracy_reward/group_std_mean": 0.22736807763576508, "signal/accuracy_reward/group_zero_std_frac": 0.390625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.090130615234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.090130615234375, "signal/advantage_abs_mean": 0.11153837889432908, "signal/advantage_pre_scale_abs_mean": 0.11153837889432908, "signal/advantage_pre_scale_std": 0.1530650556087494, "signal/advantage_std": 0.1530650556087494, "signal/brier_reward/centered_abs_mean": 0.17593652307987212, "signal/brier_reward/group_bin_occupancy": 0.832421875, "signal/brier_reward/group_std_mean": 0.22441621124744415, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014248938858509063, "signal/brier_reward/weight": 0.080991829931736, "signal/brier_reward/weighted_centered_abs_mean": 0.014248938858509063, "signal/format_reward/centered_abs_mean": 0.0049560546875, "signal/format_reward/group_bin_occupancy": 0.13203125, "signal/format_reward/group_std_mean": 0.011194882122799754, "signal/format_reward/group_zero_std_frac": 0.94375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00247802734375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00247802734375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001741332351230085, "signal/frontier_aurc_reward/group_bin_occupancy": 0.715234375, "signal/frontier_aurc_reward/group_std_mean": 0.0029535184148699045, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6885440820478835e-05, "signal/frontier_aurc_reward/weight": 0.015428835526108742, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6885440820478835e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.30704258680343627, "signal/frontier_coverage_1/group_bin_occupancy": 0.90625, "signal/frontier_coverage_1/group_std_mean": 0.3823031187057495, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00453977920114994, "signal/frontier_coverage_1/weight": 0.014787467941641808, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00453977920114994, "signal/frontier_coverage_10/centered_abs_mean": 0.30704258680343627, "signal/frontier_coverage_10/group_bin_occupancy": 0.90625, "signal/frontier_coverage_10/group_std_mean": 0.3823031187057495, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00453977920114994, "signal/frontier_coverage_10/weight": 0.014787467941641808, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00453977920114994, "signal/frontier_coverage_15/centered_abs_mean": 0.30704258680343627, "signal/frontier_coverage_15/group_bin_occupancy": 0.90625, "signal/frontier_coverage_15/group_std_mean": 0.3823031187057495, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00453977920114994, "signal/frontier_coverage_15/weight": 0.014787467941641808, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00453977920114994, "signal/frontier_coverage_20/centered_abs_mean": 0.30704258680343627, "signal/frontier_coverage_20/group_bin_occupancy": 0.90625, "signal/frontier_coverage_20/group_std_mean": 0.3823031187057495, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00453977920114994, "signal/frontier_coverage_20/weight": 0.014787467941641808, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00453977920114994, "signal/frontier_coverage_25/centered_abs_mean": 0.30704258680343627, "signal/frontier_coverage_25/group_bin_occupancy": 0.90625, "signal/frontier_coverage_25/group_std_mean": 0.3823031187057495, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00453977920114994, "signal/frontier_coverage_25/weight": 0.014787467941641808, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00453977920114994, "signal/frontier_coverage_5/centered_abs_mean": 0.30704258680343627, "signal/frontier_coverage_5/group_bin_occupancy": 0.90625, "signal/frontier_coverage_5/group_std_mean": 0.3823031187057495, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00453977920114994, "signal/frontier_coverage_5/weight": 0.014787467941641808, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00453977920114994, "signal/frontier_ece_reward/centered_abs_mean": 0.04829949140548706, "signal/frontier_ece_reward/group_bin_occupancy": 0.6453125, "signal/frontier_ece_reward/group_std_mean": 0.06934612393379211, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005220386572182179, "signal/frontier_ece_reward/weight": 0.10803199261426925, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005220386572182179, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29004823267459867, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.658984375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3490027576684952, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.06009818613529205, "signal/frontier_entropy_batch_reward/weight": 0.20712252855300903, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.06009818613529205, "step": 35 }, { "adaptive_ema/accuracy_reward": 0.25757596134861205, "adaptive_ema/brier_reward": 0.4572738365559509, "adaptive_ema/format_reward": 0.7475636501642173, "adaptive_ema/frontier_aurc_reward": 0.2447767752529431, "adaptive_ema/frontier_coverage_1": 0.2823296902337432, "adaptive_ema/frontier_coverage_10": 0.2823296902337432, "adaptive_ema/frontier_coverage_15": 0.2823296902337432, "adaptive_ema/frontier_coverage_20": 0.2823296902337432, "adaptive_ema/frontier_coverage_25": 0.2823296902337432, "adaptive_ema/frontier_coverage_5": 0.2823296902337432, "adaptive_ema/frontier_ece_reward": 0.24354919909770872, "adaptive_ema/frontier_entropy_batch_reward": -0.4113240754585915, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.07904001474380493, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.015750362537801266, "adaptive_weight/frontier_coverage_1": 0.014967151544988155, "adaptive_weight/frontier_coverage_10": 0.014967151544988155, "adaptive_weight/frontier_coverage_15": 0.014967151544988155, "adaptive_weight/frontier_coverage_20": 0.014967151544988155, "adaptive_weight/frontier_coverage_25": 0.014967151544988155, "adaptive_weight/frontier_coverage_5": 0.014967151544988155, "adaptive_weight/frontier_ece_reward": 0.11016732156276703, "adaptive_weight/frontier_entropy_batch_reward": 0.20553938448429107, "calibration/aurc": 0.4955706475359262, "calibration/batch_distribution_entropy": 0.9012512573648932, "calibration/batch_entropy_100bins": 0.6357494389080675, "calibration/batch_entropy_10bins": 0.9012512573648932, "calibration/batch_entropy_50bins": 0.7350456436079122, "calibration/batch_uniqueness": 0.858147530472006, "calibration/buffer_distribution_entropy": 0.9099325834432286, "calibration/buffer_entropy_100bins": 0.6295079060520901, "calibration/buffer_entropy_10bins": 0.9099325834432286, "calibration/buffer_entropy_50bins": 0.7297726062982012, "calibration/confidence_entropy": 0.5300262596907609, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.010176125244618394, "calibration/coverage@15%": 0.010176125244618394, "calibration/coverage@20%": 0.018395303326810174, "calibration/coverage@25%": 0.041487279843444226, "calibration/coverage@30%": 0.05870841487279843, "calibration/coverage@5%": 0.0, "calibration/ece": 0.13868593730009668, "calibration/mean_confidence": 0.44378032542648127, "calibration/prompt_uniqueness": 0.7742377528290844, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1163.2, "completions/max_terminated_length": 522.4, "completions/mean_length": 147.51142578125, "completions/mean_terminated_length": 146.96887817382813, "completions/min_length": 45.2, "completions/min_terminated_length": 45.2, "epoch": 0.128, "grad_norm": 0.0016533477464690804, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 134775678.0, "reward": 0.7621492624282837, "reward_std": 0.1226007416844368, "rewards/accuracy_reward": 0.400390625, "rewards/brier_reward": 0.7549683094024658, "rewards/format_reward": 0.9986328125, "rewards/frontier_aurc_reward": -0.004399614129215479, "rewards/frontier_coverage_1": 0.15388748794794083, "rewards/frontier_coverage_10": 0.15388748794794083, "rewards/frontier_coverage_15": 0.15388748794794083, "rewards/frontier_coverage_20": 0.15388748794794083, "rewards/frontier_coverage_25": 0.15388748794794083, "rewards/frontier_coverage_5": 0.15388748794794083, "rewards/frontier_ece_reward": 0.007644195389002561, "rewards/frontier_entropy_batch_reward": -0.057697060704231265, "signal/accuracy_reward/centered_abs_mean": 0.15521240234375, "signal/accuracy_reward/group_bin_occupancy": 0.19296875, "signal/accuracy_reward/group_std_mean": 0.19830750823020935, "signal/accuracy_reward/group_zero_std_frac": 0.45625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.077606201171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.077606201171875, "signal/advantage_abs_mean": 0.09616784155368804, "signal/advantage_pre_scale_abs_mean": 0.09616784155368804, "signal/advantage_pre_scale_std": 0.13922219276428222, "signal/advantage_std": 0.13922219276428222, "signal/brier_reward/centered_abs_mean": 0.17729856967926025, "signal/brier_reward/group_bin_occupancy": 0.86796875, "signal/brier_reward/group_std_mean": 0.22361719012260436, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014017242379486561, "signal/brier_reward/weight": 0.07904001474380493, "signal/brier_reward/weighted_centered_abs_mean": 0.014017242379486561, "signal/format_reward/centered_abs_mean": 0.00264892578125, "signal/format_reward/group_bin_occupancy": 0.13046875, "signal/format_reward/group_std_mean": 0.0077339802403002976, "signal/format_reward/group_zero_std_frac": 0.95625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001324462890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001324462890625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002261793240904808, "signal/frontier_aurc_reward/group_bin_occupancy": 0.735546875, "signal/frontier_aurc_reward/group_std_mean": 0.0035905469208955764, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5628071054816245e-05, "signal/frontier_aurc_reward/weight": 0.015750362537801266, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5628071054816245e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.25459115505218505, "signal/frontier_coverage_1/group_bin_occupancy": 0.90859375, "signal/frontier_coverage_1/group_std_mean": 0.3230483055114746, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038090549409389497, "signal/frontier_coverage_1/weight": 0.014967151544988155, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038090549409389497, "signal/frontier_coverage_10/centered_abs_mean": 0.25459115505218505, "signal/frontier_coverage_10/group_bin_occupancy": 0.90859375, "signal/frontier_coverage_10/group_std_mean": 0.3230483055114746, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038090549409389497, "signal/frontier_coverage_10/weight": 0.014967151544988155, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038090549409389497, "signal/frontier_coverage_15/centered_abs_mean": 0.25459115505218505, "signal/frontier_coverage_15/group_bin_occupancy": 0.90859375, "signal/frontier_coverage_15/group_std_mean": 0.3230483055114746, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038090549409389497, "signal/frontier_coverage_15/weight": 0.014967151544988155, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038090549409389497, "signal/frontier_coverage_20/centered_abs_mean": 0.25459115505218505, "signal/frontier_coverage_20/group_bin_occupancy": 0.90859375, "signal/frontier_coverage_20/group_std_mean": 0.3230483055114746, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038090549409389497, "signal/frontier_coverage_20/weight": 0.014967151544988155, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038090549409389497, "signal/frontier_coverage_25/centered_abs_mean": 0.25459115505218505, "signal/frontier_coverage_25/group_bin_occupancy": 0.90859375, "signal/frontier_coverage_25/group_std_mean": 0.3230483055114746, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038090549409389497, "signal/frontier_coverage_25/weight": 0.014967151544988155, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038090549409389497, "signal/frontier_coverage_5/centered_abs_mean": 0.25459115505218505, "signal/frontier_coverage_5/group_bin_occupancy": 0.90859375, "signal/frontier_coverage_5/group_std_mean": 0.3230483055114746, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038090549409389497, "signal/frontier_coverage_5/weight": 0.014967151544988155, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038090549409389497, "signal/frontier_ece_reward/centered_abs_mean": 0.06426123976707458, "signal/frontier_ece_reward/group_bin_occupancy": 0.73671875, "signal/frontier_ece_reward/group_std_mean": 0.08365523815155029, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007080427370965481, "signal/frontier_ece_reward/weight": 0.11016732156276703, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007080427370965481, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.12160103470087051, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.61484375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.1600523829460144, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024980336800217627, "signal/frontier_entropy_batch_reward/weight": 0.20553938448429107, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024980336800217627, "step": 40 }, { "adaptive_ema/accuracy_reward": 0.2671619133001659, "adaptive_ema/brier_reward": 0.4718947123865959, "adaptive_ema/format_reward": 0.7598753433415034, "adaptive_ema/frontier_aurc_reward": 0.2325843612368374, "adaptive_ema/frontier_coverage_1": 0.27418200755577676, "adaptive_ema/frontier_coverage_10": 0.27418200755577676, "adaptive_ema/frontier_coverage_15": 0.27418200755577676, "adaptive_ema/frontier_coverage_20": 0.27418200755577676, "adaptive_ema/frontier_coverage_25": 0.27418200755577676, "adaptive_ema/frontier_coverage_5": 0.27418200755577676, "adaptive_ema/frontier_ece_reward": 0.23242151496023716, "adaptive_ema/frontier_entropy_batch_reward": -0.39476656485279743, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.07716480493545533, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.01605743505060673, "adaptive_weight/frontier_coverage_1": 0.015187039040029049, "adaptive_weight/frontier_coverage_10": 0.015187039040029049, "adaptive_weight/frontier_coverage_15": 0.015187039040029049, "adaptive_weight/frontier_coverage_20": 0.015187039040029049, "adaptive_weight/frontier_coverage_25": 0.015187039040029049, "adaptive_weight/frontier_coverage_5": 0.015187039040029049, "adaptive_weight/frontier_ece_reward": 0.11215667873620987, "adaptive_weight/frontier_entropy_batch_reward": 0.20379884541034698, "calibration/aurc": 0.3362157821524915, "calibration/batch_distribution_entropy": 0.9216198855891935, "calibration/batch_entropy_100bins": 0.6318047884404534, "calibration/batch_entropy_10bins": 0.9216198855891935, "calibration/batch_entropy_50bins": 0.7343137477556684, "calibration/batch_uniqueness": 0.8617429476204027, "calibration/buffer_distribution_entropy": 0.9184048720902066, "calibration/buffer_entropy_100bins": 0.6374289092099044, "calibration/buffer_entropy_10bins": 0.9184048720902066, "calibration/buffer_entropy_50bins": 0.7380459109803228, "calibration/confidence_entropy": 0.5135767288905108, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.029296875, "calibration/coverage@15%": 0.1109375, "calibration/coverage@20%": 0.17307057240704501, "calibration/coverage@25%": 0.2977655638454012, "calibration/coverage@30%": 0.4256421232876712, "calibration/coverage@5%": 0.019140625, "calibration/ece": 0.14798410352513824, "calibration/mean_confidence": 0.5187316618199427, "calibration/prompt_uniqueness": 0.7675874974267695, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 1004.4, "completions/max_terminated_length": 751.0, "completions/mean_length": 154.86259765625, "completions/mean_terminated_length": 154.59256896972656, "completions/min_length": 51.8, "completions/min_terminated_length": 51.8, "epoch": 0.144, "grad_norm": 0.0019156603375449777, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 151311903.0, "reward": 0.8003306746482849, "reward_std": 0.12828224003314972, "rewards/accuracy_reward": 0.49716796875, "rewards/brier_reward": 0.7543691277503968, "rewards/format_reward": 0.998828125, "rewards/frontier_aurc_reward": -0.0035826864186674356, "rewards/frontier_coverage_1": 0.08718824163079261, "rewards/frontier_coverage_10": 0.08718824163079261, "rewards/frontier_coverage_15": 0.08718824163079261, "rewards/frontier_coverage_20": 0.08718824163079261, "rewards/frontier_coverage_25": 0.08718824163079261, "rewards/frontier_coverage_5": 0.08718824163079261, "rewards/frontier_ece_reward": 0.02351235654205084, "rewards/frontier_entropy_batch_reward": -0.08158636391162873, "signal/accuracy_reward/centered_abs_mean": 0.155462646484375, "signal/accuracy_reward/group_bin_occupancy": 0.199609375, "signal/accuracy_reward/group_std_mean": 0.2071024954319, "signal/accuracy_reward/group_zero_std_frac": 0.403125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0777313232421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0777313232421875, "signal/advantage_abs_mean": 0.0987144574522972, "signal/advantage_pre_scale_abs_mean": 0.0987144574522972, "signal/advantage_pre_scale_std": 0.14342830181121827, "signal/advantage_std": 0.14342830181121827, "signal/brier_reward/centered_abs_mean": 0.17776857912540436, "signal/brier_reward/group_bin_occupancy": 0.860546875, "signal/brier_reward/group_std_mean": 0.2245795577764511, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013717547245323657, "signal/brier_reward/weight": 0.07716480493545533, "signal/brier_reward/weighted_centered_abs_mean": 0.013717547245323657, "signal/format_reward/centered_abs_mean": 0.00223388671875, "signal/format_reward/group_bin_occupancy": 0.12890625, "signal/format_reward/group_std_mean": 0.005897296266630292, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001116943359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001116943359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002920934371650219, "signal/frontier_aurc_reward/group_bin_occupancy": 0.75, "signal/frontier_aurc_reward/group_std_mean": 0.004523701500147581, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.691151989391074e-05, "signal/frontier_aurc_reward/weight": 0.01605743505060673, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.691151989391074e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22277138829231263, "signal/frontier_coverage_1/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_1/group_std_mean": 0.29276385307312014, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033829107880592344, "signal/frontier_coverage_1/weight": 0.015187039040029049, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033829107880592344, "signal/frontier_coverage_10/centered_abs_mean": 0.22277138829231263, "signal/frontier_coverage_10/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_10/group_std_mean": 0.29276385307312014, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033829107880592344, "signal/frontier_coverage_10/weight": 0.015187039040029049, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033829107880592344, "signal/frontier_coverage_15/centered_abs_mean": 0.22277138829231263, "signal/frontier_coverage_15/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_15/group_std_mean": 0.29276385307312014, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033829107880592344, "signal/frontier_coverage_15/weight": 0.015187039040029049, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033829107880592344, "signal/frontier_coverage_20/centered_abs_mean": 0.22277138829231263, "signal/frontier_coverage_20/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_20/group_std_mean": 0.29276385307312014, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033829107880592344, "signal/frontier_coverage_20/weight": 0.015187039040029049, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033829107880592344, "signal/frontier_coverage_25/centered_abs_mean": 0.22277138829231263, "signal/frontier_coverage_25/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_25/group_std_mean": 0.29276385307312014, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033829107880592344, "signal/frontier_coverage_25/weight": 0.015187039040029049, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033829107880592344, "signal/frontier_coverage_5/centered_abs_mean": 0.22277138829231263, "signal/frontier_coverage_5/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_5/group_std_mean": 0.29276385307312014, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033829107880592344, "signal/frontier_coverage_5/weight": 0.015187039040029049, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033829107880592344, "signal/frontier_ece_reward/centered_abs_mean": 0.06752243638038635, "signal/frontier_ece_reward/group_bin_occupancy": 0.763671875, "signal/frontier_ece_reward/group_std_mean": 0.08624927401542663, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0075724986381828785, "signal/frontier_ece_reward/weight": 0.11215667873620987, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0075724986381828785, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1393910378217697, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.584765625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.17833027690649034, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028399351984262466, "signal/frontier_entropy_batch_reward/weight": 0.20379884541034698, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028399351984262466, "step": 45 }, { "adaptive_ema/accuracy_reward": 0.27738092581450313, "adaptive_ema/brier_reward": 0.48569612856662836, "adaptive_ema/format_reward": 0.7715978268562566, "adaptive_ema/frontier_aurc_reward": 0.22100064226531516, "adaptive_ema/frontier_coverage_1": 0.2659692743136882, "adaptive_ema/frontier_coverage_10": 0.2659692743136882, "adaptive_ema/frontier_coverage_15": 0.2659692743136882, "adaptive_ema/frontier_coverage_20": 0.2659692743136882, "adaptive_ema/frontier_coverage_25": 0.2659692743136882, "adaptive_ema/frontier_coverage_5": 0.2659692743136882, "adaptive_ema/frontier_ece_reward": 0.22203780923501104, "adaptive_ema/frontier_entropy_batch_reward": -0.37876009305917974, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.07538380771875382, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.016350987181067467, "adaptive_weight/frontier_coverage_1": 0.015407079830765724, "adaptive_weight/frontier_coverage_10": 0.015407079830765724, "adaptive_weight/frontier_coverage_15": 0.015407079830765724, "adaptive_weight/frontier_coverage_20": 0.015407079830765724, "adaptive_weight/frontier_coverage_25": 0.015407079830765724, "adaptive_weight/frontier_coverage_5": 0.015407079830765724, "adaptive_weight/frontier_ece_reward": 0.11403079777956009, "adaptive_weight/frontier_entropy_batch_reward": 0.20209192335605622, "calibration/aurc": 0.38348908347214267, "calibration/batch_distribution_entropy": 0.9456007988080287, "calibration/batch_entropy_100bins": 0.6593447663249573, "calibration/batch_entropy_10bins": 0.9456007988080287, "calibration/batch_entropy_50bins": 0.7599760050408886, "calibration/batch_uniqueness": 0.8793887918154031, "calibration/buffer_distribution_entropy": 0.9236976372936251, "calibration/buffer_entropy_100bins": 0.6426565436461602, "calibration/buffer_entropy_10bins": 0.9236976372936251, "calibration/buffer_entropy_50bins": 0.7432587692609751, "calibration/confidence_entropy": 0.4798428379337942, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.022745098039215685, "calibration/coverage@20%": 0.07201281068934423, "calibration/coverage@25%": 0.10175239402267758, "calibration/coverage@30%": 0.23394594053374776, "calibration/coverage@5%": 0.0, "calibration/ece": 0.10627890594104014, "calibration/mean_confidence": 0.49457840209347503, "calibration/prompt_uniqueness": 0.7750884495317377, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1312.2, "completions/max_terminated_length": 465.2, "completions/mean_length": 159.719140625, "completions/mean_terminated_length": 159.0467041015625, "completions/min_length": 62.4, "completions/min_terminated_length": 62.4, "epoch": 0.16, "grad_norm": 0.0052760387770831585, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 167968355.0, "reward": 0.7870096206665039, "reward_std": 0.12264777570962906, "rewards/accuracy_reward": 0.451171875, "rewards/brier_reward": 0.7576644659042359, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.0038807093631476166, "rewards/frontier_coverage_1": 0.13388580977916717, "rewards/frontier_coverage_10": 0.13388580977916717, "rewards/frontier_coverage_15": 0.13388580977916717, "rewards/frontier_coverage_20": 0.13388580977916717, "rewards/frontier_coverage_25": 0.13388580977916717, "rewards/frontier_coverage_5": 0.13388580977916717, "rewards/frontier_ece_reward": 0.019111651740968228, "rewards/frontier_entropy_batch_reward": -0.04900626316666603, "signal/accuracy_reward/centered_abs_mean": 0.15225830078125, "signal/accuracy_reward/group_bin_occupancy": 0.19375, "signal/accuracy_reward/group_std_mean": 0.19692128896713257, "signal/accuracy_reward/group_zero_std_frac": 0.45, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.076129150390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.076129150390625, "signal/advantage_abs_mean": 0.09508385807275772, "signal/advantage_pre_scale_abs_mean": 0.09508385807275772, "signal/advantage_pre_scale_std": 0.14095271229743958, "signal/advantage_std": 0.14095271229743958, "signal/brier_reward/centered_abs_mean": 0.1855572283267975, "signal/brier_reward/group_bin_occupancy": 0.851953125, "signal/brier_reward/group_std_mean": 0.23422395586967468, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013987931795418263, "signal/brier_reward/weight": 0.07538380771875382, "signal/brier_reward/weighted_centered_abs_mean": 0.013987931795418263, "signal/format_reward/centered_abs_mean": 0.0018798828125, "signal/format_reward/group_bin_occupancy": 0.128515625, "signal/format_reward/group_std_mean": 0.005187963135540485, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00093994140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003281982522457838, "signal/frontier_aurc_reward/group_bin_occupancy": 0.742578125, "signal/frontier_aurc_reward/group_std_mean": 0.004983571451157331, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3681767894886434e-05, "signal/frontier_aurc_reward/weight": 0.016350987181067467, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3681767894886434e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22950102984905243, "signal/frontier_coverage_1/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_1/group_std_mean": 0.2979970157146454, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035361848771572114, "signal/frontier_coverage_1/weight": 0.015407079830765724, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035361848771572114, "signal/frontier_coverage_10/centered_abs_mean": 0.22950102984905243, "signal/frontier_coverage_10/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_10/group_std_mean": 0.2979970157146454, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035361848771572114, "signal/frontier_coverage_10/weight": 0.015407079830765724, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035361848771572114, "signal/frontier_coverage_15/centered_abs_mean": 0.22950102984905243, "signal/frontier_coverage_15/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_15/group_std_mean": 0.2979970157146454, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035361848771572114, "signal/frontier_coverage_15/weight": 0.015407079830765724, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035361848771572114, "signal/frontier_coverage_20/centered_abs_mean": 0.22950102984905243, "signal/frontier_coverage_20/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_20/group_std_mean": 0.2979970157146454, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035361848771572114, "signal/frontier_coverage_20/weight": 0.015407079830765724, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035361848771572114, "signal/frontier_coverage_25/centered_abs_mean": 0.22950102984905243, "signal/frontier_coverage_25/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_25/group_std_mean": 0.2979970157146454, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035361848771572114, "signal/frontier_coverage_25/weight": 0.015407079830765724, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035361848771572114, "signal/frontier_coverage_5/centered_abs_mean": 0.22950102984905243, "signal/frontier_coverage_5/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_5/group_std_mean": 0.2979970157146454, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035361848771572114, "signal/frontier_coverage_5/weight": 0.015407079830765724, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035361848771572114, "signal/frontier_ece_reward/centered_abs_mean": 0.06420275643467903, "signal/frontier_ece_reward/group_bin_occupancy": 0.766015625, "signal/frontier_ece_reward/group_std_mean": 0.08285662680864334, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007319671288132668, "signal/frontier_ece_reward/weight": 0.11403079777956009, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007319671288132668, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08867169320583343, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.583984375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.115115886926651, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01792623642832041, "signal/frontier_entropy_batch_reward/weight": 0.20209192335605622, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01792623642832041, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.5912737570825037, "eval_calibration/batch_distribution_entropy": 0.8838553804869622, "eval_calibration/batch_entropy_100bins": 0.5698017244122562, "eval_calibration/batch_entropy_10bins": 0.8838553804869622, "eval_calibration/batch_entropy_50bins": 0.6635008310587025, "eval_calibration/batch_uniqueness": 0.8337689418574401, "eval_calibration/buffer_distribution_entropy": 0.9266873253076938, "eval_calibration/buffer_entropy_100bins": 0.6462848756942279, "eval_calibration/buffer_entropy_10bins": 0.9266873253076938, "eval_calibration/buffer_entropy_50bins": 0.746800332589743, "eval_calibration/confidence_entropy": 0.46311622799423224, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0, "eval_calibration/coverage@20%": 0.0, "eval_calibration/coverage@25%": 0.0, "eval_calibration/coverage@30%": 0.0, "eval_calibration/coverage@5%": 0.0, "eval_calibration/ece": 0.24023437500000003, "eval_calibration/mean_confidence": 0.4886668346774193, "eval_calibration/prompt_uniqueness": 0.8337689418574401, "eval_completions/clipped_ratio": 0.001953125, "eval_completions/max_length": 661.75, "eval_completions/max_terminated_length": 361.5, "eval_completions/mean_length": 165.89580917358398, "eval_completions/mean_terminated_length": 163.20844650268555, "eval_completions/min_length": 80.0, "eval_completions/min_terminated_length": 80.0, "eval_loss": 0.0, "eval_num_tokens": 167968355.0, "eval_reward": 0.704633966088295, "eval_reward_std": 0.23834892362356186, "eval_rewards/accuracy_reward": 0.36328125, "eval_rewards/brier_reward": 0.741399809718132, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_aurc_reward": -0.004931134288199246, "eval_rewards/frontier_coverage_1": 0.18837910518050194, "eval_rewards/frontier_coverage_10": 0.18837910518050194, "eval_rewards/frontier_coverage_15": 0.18837910518050194, "eval_rewards/frontier_coverage_20": 0.18837910518050194, "eval_rewards/frontier_coverage_25": 0.18837910518050194, "eval_rewards/frontier_coverage_5": 0.18837910518050194, "eval_rewards/frontier_ece_reward": 0.005667033372446895, "eval_rewards/frontier_entropy_batch_reward": -0.24560075998306274, "eval_runtime": 28.0344, "eval_samples_per_second": 17.835, "eval_signal/accuracy_reward/centered_abs_mean": 0.44580078125, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4784899652004242, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.222900390625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.222900390625, "eval_signal/advantage_abs_mean": 0.20399565622210503, "eval_signal/advantage_pre_scale_abs_mean": 0.20399565622210503, "eval_signal/advantage_pre_scale_std": 0.23601685464382172, "eval_signal/advantage_std": 0.23601685464382172, "eval_signal/brier_reward/centered_abs_mean": 0.23530442267656326, "eval_signal/brier_reward/group_bin_occupancy": 0.9375, "eval_signal/brier_reward/group_std_mean": 0.28814616054296494, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01757534919306636, "eval_signal/brier_reward/weight": 0.07469195872545242, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01757534919306636, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_bin_occupancy": 0.1328125, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0044213252840563655, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7890625, "eval_signal/frontier_aurc_reward/group_std_mean": 0.006909370771609247, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.282986189238727e-05, "eval_signal/frontier_aurc_reward/weight": 0.016472404822707176, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.282986189238727e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.34682943671941757, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_1/group_std_mean": 0.44357454776763916, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0053692207438871264, "eval_signal/frontier_coverage_1/weight": 0.015480867587029934, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0053692207438871264, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.34682943671941757, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_10/group_std_mean": 0.44357454776763916, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0053692207438871264, "eval_signal/frontier_coverage_10/weight": 0.015480867587029934, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0053692207438871264, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.34682943671941757, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_15/group_std_mean": 0.44357454776763916, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0053692207438871264, "eval_signal/frontier_coverage_15/weight": 0.015480867587029934, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0053692207438871264, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.34682943671941757, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_20/group_std_mean": 0.44357454776763916, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0053692207438871264, "eval_signal/frontier_coverage_20/weight": 0.015480867587029934, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0053692207438871264, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.34682943671941757, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_25/group_std_mean": 0.44357454776763916, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0053692207438871264, "eval_signal/frontier_coverage_25/weight": 0.015480867587029934, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0053692207438871264, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.34682943671941757, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_5/group_std_mean": 0.44357454776763916, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0053692207438871264, "eval_signal/frontier_coverage_5/weight": 0.015480867587029934, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0053692207438871264, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.07570397295057774, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9375, "eval_signal/frontier_ece_reward/group_std_mean": 0.10851926729083061, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008691624971106648, "eval_signal/frontier_ece_reward/weight": 0.11481068283319473, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008691624971106648, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3352913558483124, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.53125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.39990557730197906, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.06754100508987904, "eval_signal/frontier_entropy_batch_reward/weight": 0.2014397382736206, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.06754100508987904, "eval_steps_per_second": 0.143, "step": 50 }, { "adaptive_ema/accuracy_reward": 0.2851488862848163, "adaptive_ema/brier_reward": 0.49900272362611686, "adaptive_ema/format_reward": 0.7827584087449388, "adaptive_ema/frontier_aurc_reward": 0.2099823909624985, "adaptive_ema/frontier_coverage_1": 0.2605413524021998, "adaptive_ema/frontier_coverage_10": 0.2605413524021998, "adaptive_ema/frontier_coverage_15": 0.2605413524021998, "adaptive_ema/frontier_coverage_20": 0.2605413524021998, "adaptive_ema/frontier_coverage_25": 0.2605413524021998, "adaptive_ema/frontier_coverage_5": 0.2605413524021998, "adaptive_ema/frontier_ece_reward": 0.21204188899333073, "adaptive_ema/frontier_entropy_batch_reward": -0.36382088058690953, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.07369283884763718, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.01664076782763004, "adaptive_weight/frontier_coverage_1": 0.015575786866247655, "adaptive_weight/frontier_coverage_10": 0.015575786866247655, "adaptive_weight/frontier_coverage_15": 0.015575786866247655, "adaptive_weight/frontier_coverage_20": 0.015575786866247655, "adaptive_weight/frontier_coverage_25": 0.015575786866247655, "adaptive_weight/frontier_coverage_5": 0.015575786866247655, "adaptive_weight/frontier_ece_reward": 0.11590351611375808, "adaptive_weight/frontier_entropy_batch_reward": 0.20060815215110778, "calibration/aurc": 0.42473116107063263, "calibration/batch_distribution_entropy": 0.9512775063954914, "calibration/batch_entropy_100bins": 0.664655780835464, "calibration/batch_entropy_10bins": 0.9512775063954914, "calibration/batch_entropy_50bins": 0.7677863296635008, "calibration/batch_uniqueness": 0.8851788600876166, "calibration/buffer_distribution_entropy": 0.9283644128738313, "calibration/buffer_entropy_100bins": 0.6481654925489638, "calibration/buffer_entropy_10bins": 0.9283644128738313, "calibration/buffer_entropy_50bins": 0.7487002117614024, "calibration/confidence_entropy": 0.45550758726326446, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.02815563725490196, "calibration/coverage@25%": 0.03518688725490196, "calibration/coverage@30%": 0.14534313725490194, "calibration/coverage@5%": 0.0, "calibration/ece": 0.18896995251225493, "calibration/mean_confidence": 0.5027503140318628, "calibration/prompt_uniqueness": 0.7720712270746619, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 899.4, "completions/max_terminated_length": 475.6, "completions/mean_length": 166.46630859375, "completions/mean_terminated_length": 166.06537475585938, "completions/min_length": 62.8, "completions/min_terminated_length": 62.8, "epoch": 0.176, "grad_norm": 0.016879109665751457, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 184910090.0, "reward": 0.7733013749122619, "reward_std": 0.11779149472713471, "rewards/accuracy_reward": 0.43916015625, "rewards/brier_reward": 0.7471538543701172, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.003878576587885618, "rewards/frontier_coverage_1": 0.14781001955270767, "rewards/frontier_coverage_10": 0.14781001955270767, "rewards/frontier_coverage_15": 0.14781001955270767, "rewards/frontier_coverage_20": 0.14781001955270767, "rewards/frontier_coverage_25": 0.14781001955270767, "rewards/frontier_coverage_5": 0.14781001955270767, "rewards/frontier_ece_reward": 0.01585045214742422, "rewards/frontier_entropy_batch_reward": -0.0840143196284771, "signal/accuracy_reward/centered_abs_mean": 0.150177001953125, "signal/accuracy_reward/group_bin_occupancy": 0.192578125, "signal/accuracy_reward/group_std_mean": 0.194138702750206, "signal/accuracy_reward/group_zero_std_frac": 0.459375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0750885009765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0750885009765625, "signal/advantage_abs_mean": 0.09202968776226043, "signal/advantage_pre_scale_abs_mean": 0.09202968776226043, "signal/advantage_pre_scale_std": 0.1346214473247528, "signal/advantage_std": 0.1346214473247528, "signal/brier_reward/centered_abs_mean": 0.19577476978302003, "signal/brier_reward/group_bin_occupancy": 0.840625, "signal/brier_reward/group_std_mean": 0.2449037402868271, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014425278082489968, "signal/brier_reward/weight": 0.07369283884763718, "signal/brier_reward/weighted_centered_abs_mean": 0.014425278082489968, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.002762135770171881, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0033737838268280028, "signal/frontier_aurc_reward/group_bin_occupancy": 0.72578125, "signal/frontier_aurc_reward/group_std_mean": 0.005213375855237246, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.614309338852763e-05, "signal/frontier_aurc_reward/weight": 0.01664076782763004, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.614309338852763e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2555152654647827, "signal/frontier_coverage_1/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_1/group_std_mean": 0.32543211579322817, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003980266209691763, "signal/frontier_coverage_1/weight": 0.015575786866247655, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003980266209691763, "signal/frontier_coverage_10/centered_abs_mean": 0.2555152654647827, "signal/frontier_coverage_10/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_10/group_std_mean": 0.32543211579322817, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003980266209691763, "signal/frontier_coverage_10/weight": 0.015575786866247655, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003980266209691763, "signal/frontier_coverage_15/centered_abs_mean": 0.2555152654647827, "signal/frontier_coverage_15/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_15/group_std_mean": 0.32543211579322817, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003980266209691763, "signal/frontier_coverage_15/weight": 0.015575786866247655, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003980266209691763, "signal/frontier_coverage_20/centered_abs_mean": 0.2555152654647827, "signal/frontier_coverage_20/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_20/group_std_mean": 0.32543211579322817, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003980266209691763, "signal/frontier_coverage_20/weight": 0.015575786866247655, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003980266209691763, "signal/frontier_coverage_25/centered_abs_mean": 0.2555152654647827, "signal/frontier_coverage_25/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_25/group_std_mean": 0.32543211579322817, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003980266209691763, "signal/frontier_coverage_25/weight": 0.015575786866247655, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003980266209691763, "signal/frontier_coverage_5/centered_abs_mean": 0.2555152654647827, "signal/frontier_coverage_5/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_5/group_std_mean": 0.32543211579322817, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003980266209691763, "signal/frontier_coverage_5/weight": 0.015575786866247655, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003980266209691763, "signal/frontier_ece_reward/centered_abs_mean": 0.056504715234041214, "signal/frontier_ece_reward/group_bin_occupancy": 0.734765625, "signal/frontier_ece_reward/group_std_mean": 0.07384437769651413, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006548455357551575, "signal/frontier_ece_reward/weight": 0.11590351611375808, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006548455357551575, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.13305359482765197, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.537890625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.16954652667045594, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026704121381044388, "signal/frontier_entropy_batch_reward/weight": 0.20060815215110778, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026704121381044388, "step": 55 }, { "adaptive_ema/accuracy_reward": 0.2937015969328449, "adaptive_ema/brier_reward": 0.5114010685111742, "adaptive_ema/format_reward": 0.7933795053837518, "adaptive_ema/frontier_aurc_reward": 0.19951788453686373, "adaptive_ema/frontier_coverage_1": 0.2544931446820287, "adaptive_ema/frontier_coverage_10": 0.2544931446820287, "adaptive_ema/frontier_coverage_15": 0.2544931446820287, "adaptive_ema/frontier_coverage_20": 0.2544931446820287, "adaptive_ema/frontier_coverage_25": 0.2544931446820287, "adaptive_ema/frontier_coverage_5": 0.2544931446820287, "adaptive_ema/frontier_ece_reward": 0.2025474157793294, "adaptive_ema/frontier_entropy_batch_reward": -0.3494971511591881, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.07209239751100541, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.016913557425141335, "adaptive_weight/frontier_coverage_1": 0.01575196459889412, "adaptive_weight/frontier_coverage_10": 0.01575196459889412, "adaptive_weight/frontier_coverage_15": 0.01575196459889412, "adaptive_weight/frontier_coverage_20": 0.01575196459889412, "adaptive_weight/frontier_coverage_25": 0.01575196459889412, "adaptive_weight/frontier_coverage_5": 0.01575196459889412, "adaptive_weight/frontier_ece_reward": 0.1176643967628479, "adaptive_weight/frontier_entropy_batch_reward": 0.19911785721778869, "calibration/aurc": 0.3647552716722237, "calibration/batch_distribution_entropy": 0.9517000612178709, "calibration/batch_entropy_100bins": 0.6801240206424846, "calibration/batch_entropy_10bins": 0.9517000612178709, "calibration/batch_entropy_50bins": 0.7834312829799275, "calibration/batch_uniqueness": 0.889671378153011, "calibration/buffer_distribution_entropy": 0.9329493751260302, "calibration/buffer_entropy_100bins": 0.6537943828426254, "calibration/buffer_entropy_10bins": 0.9329493751260302, "calibration/buffer_entropy_50bins": 0.7544689893599347, "calibration/confidence_entropy": 0.46903725087093895, "calibration/coverage@0%": 0.0050804182974559685, "calibration/coverage@1%": 0.0050804182974559685, "calibration/coverage@10%": 0.025002293297455968, "calibration/coverage@15%": 0.03359604329745597, "calibration/coverage@20%": 0.07428219789628179, "calibration/coverage@25%": 0.1606837084148728, "calibration/coverage@30%": 0.24316368028375734, "calibration/coverage@5%": 0.0050804182974559685, "calibration/ece": 0.12325721157197897, "calibration/mean_confidence": 0.46352088132185665, "calibration/prompt_uniqueness": 0.7723385369244277, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 781.4, "completions/max_terminated_length": 574.4, "completions/mean_length": 174.29404296875, "completions/mean_terminated_length": 174.160693359375, "completions/min_length": 60.2, "completions/min_terminated_length": 60.2, "epoch": 0.192, "grad_norm": 0.0022992538288235664, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 201509677.0, "reward": 0.7893312335014343, "reward_std": 0.10895285159349441, "rewards/accuracy_reward": 0.4681640625, "rewards/brier_reward": 0.7553203701972961, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0033560583367943765, "rewards/frontier_coverage_1": 0.13129240423440933, "rewards/frontier_coverage_10": 0.13129240423440933, "rewards/frontier_coverage_15": 0.13129240423440933, "rewards/frontier_coverage_20": 0.13129240423440933, "rewards/frontier_coverage_25": 0.13129240423440933, "rewards/frontier_coverage_5": 0.13129240423440933, "rewards/frontier_ece_reward": 0.019136627763509752, "rewards/frontier_entropy_batch_reward": -0.06853316724300385, "signal/accuracy_reward/centered_abs_mean": 0.1386474609375, "signal/accuracy_reward/group_bin_occupancy": 0.188671875, "signal/accuracy_reward/group_std_mean": 0.1808041363954544, "signal/accuracy_reward/group_zero_std_frac": 0.490625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06932373046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06932373046875, "signal/advantage_abs_mean": 0.08480708599090576, "signal/advantage_pre_scale_abs_mean": 0.08480708599090576, "signal/advantage_pre_scale_std": 0.1272047370672226, "signal/advantage_std": 0.1272047370672226, "signal/brier_reward/centered_abs_mean": 0.18749885261058807, "signal/brier_reward/group_bin_occupancy": 0.840234375, "signal/brier_reward/group_std_mean": 0.23523322641849517, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01351576652377844, "signal/brier_reward/weight": 0.07209239751100541, "signal/brier_reward/weighted_centered_abs_mean": 0.01351576652377844, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_bin_occupancy": 0.127734375, "signal/format_reward/group_std_mean": 0.003866990143433213, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002764544356614351, "signal/frontier_aurc_reward/group_bin_occupancy": 0.72265625, "signal/frontier_aurc_reward/group_std_mean": 0.004374626139178872, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.676385360653512e-05, "signal/frontier_aurc_reward/weight": 0.016913557425141335, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.676385360653512e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2529231429100037, "signal/frontier_coverage_1/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_1/group_std_mean": 0.3189453959465027, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003983773337677121, "signal/frontier_coverage_1/weight": 0.01575196459889412, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003983773337677121, "signal/frontier_coverage_10/centered_abs_mean": 0.2529231429100037, "signal/frontier_coverage_10/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_10/group_std_mean": 0.3189453959465027, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003983773337677121, "signal/frontier_coverage_10/weight": 0.01575196459889412, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003983773337677121, "signal/frontier_coverage_15/centered_abs_mean": 0.2529231429100037, "signal/frontier_coverage_15/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_15/group_std_mean": 0.3189453959465027, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003983773337677121, "signal/frontier_coverage_15/weight": 0.01575196459889412, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003983773337677121, "signal/frontier_coverage_20/centered_abs_mean": 0.2529231429100037, "signal/frontier_coverage_20/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_20/group_std_mean": 0.3189453959465027, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003983773337677121, "signal/frontier_coverage_20/weight": 0.01575196459889412, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003983773337677121, "signal/frontier_coverage_25/centered_abs_mean": 0.2529231429100037, "signal/frontier_coverage_25/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_25/group_std_mean": 0.3189453959465027, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003983773337677121, "signal/frontier_coverage_25/weight": 0.01575196459889412, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003983773337677121, "signal/frontier_coverage_5/centered_abs_mean": 0.2529231429100037, "signal/frontier_coverage_5/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_5/group_std_mean": 0.3189453959465027, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003983773337677121, "signal/frontier_coverage_5/weight": 0.01575196459889412, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003983773337677121, "signal/frontier_ece_reward/centered_abs_mean": 0.04883822426199913, "signal/frontier_ece_reward/group_bin_occupancy": 0.71328125, "signal/frontier_ece_reward/group_std_mean": 0.0641761988401413, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005746498424559832, "signal/frontier_ece_reward/weight": 0.1176643967628479, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005746498424559832, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.11229871660470962, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.58046875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.14136107563972472, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022366367653012274, "signal/frontier_entropy_batch_reward/weight": 0.19911785721778869, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022366367653012274, "step": 60 }, { "adaptive_ema/accuracy_reward": 0.3038428838680613, "adaptive_ema/brier_reward": 0.5228995794258062, "adaptive_ema/format_reward": 0.8034687394880666, "adaptive_ema/frontier_aurc_reward": 0.18958452402128695, "adaptive_ema/frontier_coverage_1": 0.24643764275075056, "adaptive_ema/frontier_coverage_10": 0.24643764275075056, "adaptive_ema/frontier_coverage_15": 0.24643764275075056, "adaptive_ema/frontier_coverage_20": 0.24643764275075056, "adaptive_ema/frontier_coverage_25": 0.24643764275075056, "adaptive_ema/frontier_coverage_5": 0.24643764275075056, "adaptive_ema/frontier_ece_reward": 0.1935301358220703, "adaptive_ema/frontier_entropy_batch_reward": -0.334805943644907, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.07057955414056778, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.01716814637184143, "adaptive_weight/frontier_coverage_1": 0.015963743999600412, "adaptive_weight/frontier_coverage_10": 0.015963743999600412, "adaptive_weight/frontier_coverage_15": 0.015963743999600412, "adaptive_weight/frontier_coverage_20": 0.015963743999600412, "adaptive_weight/frontier_coverage_25": 0.015963743999600412, "adaptive_weight/frontier_coverage_5": 0.015963743999600412, "adaptive_weight/frontier_ece_reward": 0.1193055659532547, "adaptive_weight/frontier_entropy_batch_reward": 0.19746426343917847, "calibration/aurc": 0.3230782286663958, "calibration/batch_distribution_entropy": 0.9548661124099714, "calibration/batch_entropy_100bins": 0.6952415427511257, "calibration/batch_entropy_10bins": 0.9548661124099714, "calibration/batch_entropy_50bins": 0.7961299048491316, "calibration/batch_uniqueness": 0.895257568359375, "calibration/buffer_distribution_entropy": 0.9375800889991058, "calibration/buffer_entropy_100bins": 0.6608783801545776, "calibration/buffer_entropy_10bins": 0.9375800889991058, "calibration/buffer_entropy_50bins": 0.761598284883173, "calibration/confidence_entropy": 0.47213845584893355, "calibration/coverage@0%": 0.008203125, "calibration/coverage@1%": 0.008203125, "calibration/coverage@10%": 0.057421875, "calibration/coverage@15%": 0.143359375, "calibration/coverage@20%": 0.21484375, "calibration/coverage@25%": 0.363671875, "calibration/coverage@30%": 0.51640625, "calibration/coverage@5%": 0.022265625, "calibration/ece": 0.17461655873487095, "calibration/mean_confidence": 0.4572429964962189, "calibration/prompt_uniqueness": 0.79521484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 939.4, "completions/max_terminated_length": 589.4, "completions/mean_length": 185.18642578125, "completions/mean_terminated_length": 184.7908447265625, "completions/min_length": 69.4, "completions/min_terminated_length": 69.4, "epoch": 0.208, "grad_norm": 0.001206784276291728, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 218438210.0, "reward": 0.8118098497390747, "reward_std": 0.1081416666507721, "rewards/accuracy_reward": 0.51357421875, "rewards/brier_reward": 0.7503050684928894, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0029526965226978064, "rewards/frontier_coverage_1": 0.080244729667902, "rewards/frontier_coverage_10": 0.080244729667902, "rewards/frontier_coverage_15": 0.080244729667902, "rewards/frontier_coverage_20": 0.080244729667902, "rewards/frontier_coverage_25": 0.080244729667902, "rewards/frontier_coverage_5": 0.080244729667902, "rewards/frontier_ece_reward": 0.02022084631025791, "rewards/frontier_entropy_batch_reward": -0.039874791353940967, "signal/accuracy_reward/centered_abs_mean": 0.141192626953125, "signal/accuracy_reward/group_bin_occupancy": 0.194140625, "signal/accuracy_reward/group_std_mean": 0.18957480490207673, "signal/accuracy_reward/group_zero_std_frac": 0.446875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0705963134765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0705963134765625, "signal/advantage_abs_mean": 0.08337005525827408, "signal/advantage_pre_scale_abs_mean": 0.08337005525827408, "signal/advantage_pre_scale_std": 0.12559250891208648, "signal/advantage_std": 0.12559250891208648, "signal/brier_reward/centered_abs_mean": 0.18458410501480102, "signal/brier_reward/group_bin_occupancy": 0.85859375, "signal/brier_reward/group_std_mean": 0.23162301778793334, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01303065400570631, "signal/brier_reward/weight": 0.07057955414056778, "signal/brier_reward/weighted_centered_abs_mean": 0.01303065400570631, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0033145629335194827, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002375226141884923, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7328125, "signal/frontier_aurc_reward/group_std_mean": 0.0037417122628539802, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.078082347405143e-05, "signal/frontier_aurc_reward/weight": 0.01716814637184143, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.078082347405143e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.25239089131355286, "signal/frontier_coverage_1/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_1/group_std_mean": 0.32096874713897705, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004028905322775245, "signal/frontier_coverage_1/weight": 0.015963743999600412, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004028905322775245, "signal/frontier_coverage_10/centered_abs_mean": 0.25239089131355286, "signal/frontier_coverage_10/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_10/group_std_mean": 0.32096874713897705, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004028905322775245, "signal/frontier_coverage_10/weight": 0.015963743999600412, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004028905322775245, "signal/frontier_coverage_15/centered_abs_mean": 0.25239089131355286, "signal/frontier_coverage_15/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_15/group_std_mean": 0.32096874713897705, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004028905322775245, "signal/frontier_coverage_15/weight": 0.015963743999600412, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004028905322775245, "signal/frontier_coverage_20/centered_abs_mean": 0.25239089131355286, "signal/frontier_coverage_20/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_20/group_std_mean": 0.32096874713897705, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004028905322775245, "signal/frontier_coverage_20/weight": 0.015963743999600412, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004028905322775245, "signal/frontier_coverage_25/centered_abs_mean": 0.25239089131355286, "signal/frontier_coverage_25/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_25/group_std_mean": 0.32096874713897705, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004028905322775245, "signal/frontier_coverage_25/weight": 0.015963743999600412, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004028905322775245, "signal/frontier_coverage_5/centered_abs_mean": 0.25239089131355286, "signal/frontier_coverage_5/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_5/group_std_mean": 0.32096874713897705, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004028905322775245, "signal/frontier_coverage_5/weight": 0.015963743999600412, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004028905322775245, "signal/frontier_ece_reward/centered_abs_mean": 0.04425336569547653, "signal/frontier_ece_reward/group_bin_occupancy": 0.716796875, "signal/frontier_ece_reward/group_std_mean": 0.05906342342495918, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005278925970196724, "signal/frontier_ece_reward/weight": 0.1193055659532547, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005278925970196724, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07931657396256923, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.540625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.10367400497198105, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01566738625988364, "signal/frontier_entropy_batch_reward/weight": 0.19746426343917847, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01566738625988364, "step": 65 }, { "adaptive_ema/accuracy_reward": 0.31235736482134985, "adaptive_ema/brier_reward": 0.5346501426524177, "adaptive_ema/format_reward": 0.8130536589228408, "adaptive_ema/frontier_aurc_reward": 0.18014086612295538, "adaptive_ema/frontier_coverage_1": 0.2403427264163771, "adaptive_ema/frontier_coverage_10": 0.2403427264163771, "adaptive_ema/frontier_coverage_15": 0.2403427264163771, "adaptive_ema/frontier_coverage_20": 0.2403427264163771, "adaptive_ema/frontier_coverage_25": 0.2403427264163771, "adaptive_ema/frontier_coverage_5": 0.2403427264163771, "adaptive_ema/frontier_ece_reward": 0.18498148000477496, "adaptive_ema/frontier_entropy_batch_reward": -0.32019707006610054, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.06907036155462265, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.017426037043333054, "adaptive_weight/frontier_coverage_1": 0.016146432608366013, "adaptive_weight/frontier_coverage_10": 0.016146432608366013, "adaptive_weight/frontier_coverage_15": 0.016146432608366013, "adaptive_weight/frontier_coverage_20": 0.016146432608366013, "adaptive_weight/frontier_coverage_25": 0.016146432608366013, "adaptive_weight/frontier_coverage_5": 0.016146432608366013, "adaptive_weight/frontier_ece_reward": 0.12097169011831284, "adaptive_weight/frontier_entropy_batch_reward": 0.19595331251621245, "calibration/aurc": 0.3370663750465913, "calibration/batch_distribution_entropy": 0.9550954799649769, "calibration/batch_entropy_100bins": 0.7028098750078203, "calibration/batch_entropy_10bins": 0.9550954799649769, "calibration/batch_entropy_50bins": 0.8028701619900716, "calibration/batch_uniqueness": 0.8958972456600977, "calibration/buffer_distribution_entropy": 0.9420689064142062, "calibration/buffer_entropy_100bins": 0.6680678396022661, "calibration/buffer_entropy_10bins": 0.9420689064142062, "calibration/buffer_entropy_50bins": 0.7688077976399332, "calibration/confidence_entropy": 0.46388023878603474, "calibration/coverage@0%": 0.003125, "calibration/coverage@1%": 0.003125, "calibration/coverage@10%": 0.03712469362745098, "calibration/coverage@15%": 0.12622395833333333, "calibration/coverage@20%": 0.2442907475490196, "calibration/coverage@25%": 0.32597273284313727, "calibration/coverage@30%": 0.4839981617647059, "calibration/coverage@5%": 0.01015625, "calibration/ece": 0.19436231464460785, "calibration/mean_confidence": 0.4149125628063725, "calibration/prompt_uniqueness": 0.785660688735692, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 1340.0, "completions/max_terminated_length": 575.0, "completions/mean_length": 189.396484375, "completions/mean_terminated_length": 188.08037414550782, "completions/min_length": 64.2, "completions/min_terminated_length": 64.2, "epoch": 0.224, "grad_norm": 0.001395778963342309, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 235530814.0, "reward": 0.7931761741638184, "reward_std": 0.10108533054590225, "rewards/accuracy_reward": 0.46767578125, "rewards/brier_reward": 0.7646504402160644, "rewards/format_reward": 0.9986328125, "rewards/frontier_aurc_reward": -0.003211074694991112, "rewards/frontier_coverage_1": 0.1359811007976532, "rewards/frontier_coverage_10": 0.1359811007976532, "rewards/frontier_coverage_15": 0.1359811007976532, "rewards/frontier_coverage_20": 0.1359811007976532, "rewards/frontier_coverage_25": 0.1359811007976532, "rewards/frontier_coverage_5": 0.1359811007976532, "rewards/frontier_ece_reward": 0.017966778576374055, "rewards/frontier_entropy_batch_reward": -0.04219883792102337, "signal/accuracy_reward/centered_abs_mean": 0.128875732421875, "signal/accuracy_reward/group_bin_occupancy": 0.1859375, "signal/accuracy_reward/group_std_mean": 0.1700698047876358, "signal/accuracy_reward/group_zero_std_frac": 0.5125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0644378662109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0644378662109375, "signal/advantage_abs_mean": 0.07703877985477448, "signal/advantage_pre_scale_abs_mean": 0.07703877985477448, "signal/advantage_pre_scale_std": 0.12077159732580185, "signal/advantage_std": 0.12077159732580185, "signal/brier_reward/centered_abs_mean": 0.1796649605035782, "signal/brier_reward/group_bin_occupancy": 0.8453125, "signal/brier_reward/group_std_mean": 0.22606565058231354, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012408938072621823, "signal/brier_reward/weight": 0.06907036155462265, "signal/brier_reward/weighted_centered_abs_mean": 0.012408938072621823, "signal/format_reward/centered_abs_mean": 0.00264892578125, "signal/format_reward/group_bin_occupancy": 0.13046875, "signal/format_reward/group_std_mean": 0.0077339803334325555, "signal/format_reward/group_zero_std_frac": 0.95625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001324462890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001324462890625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024040113668888806, "signal/frontier_aurc_reward/group_bin_occupancy": 0.72734375, "signal/frontier_aurc_reward/group_std_mean": 0.003842458548024297, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.189369719824754e-05, "signal/frontier_aurc_reward/weight": 0.017426037043333054, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.189369719824754e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.24840882122516633, "signal/frontier_coverage_1/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_1/group_std_mean": 0.3130028069019318, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004010894149541855, "signal/frontier_coverage_1/weight": 0.016146432608366013, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004010894149541855, "signal/frontier_coverage_10/centered_abs_mean": 0.24840882122516633, "signal/frontier_coverage_10/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_10/group_std_mean": 0.3130028069019318, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004010894149541855, "signal/frontier_coverage_10/weight": 0.016146432608366013, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004010894149541855, "signal/frontier_coverage_15/centered_abs_mean": 0.24840882122516633, "signal/frontier_coverage_15/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_15/group_std_mean": 0.3130028069019318, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004010894149541855, "signal/frontier_coverage_15/weight": 0.016146432608366013, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004010894149541855, "signal/frontier_coverage_20/centered_abs_mean": 0.24840882122516633, "signal/frontier_coverage_20/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_20/group_std_mean": 0.3130028069019318, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004010894149541855, "signal/frontier_coverage_20/weight": 0.016146432608366013, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004010894149541855, "signal/frontier_coverage_25/centered_abs_mean": 0.24840882122516633, "signal/frontier_coverage_25/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_25/group_std_mean": 0.3130028069019318, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004010894149541855, "signal/frontier_coverage_25/weight": 0.016146432608366013, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004010894149541855, "signal/frontier_coverage_5/centered_abs_mean": 0.24840882122516633, "signal/frontier_coverage_5/group_bin_occupancy": 0.893359375, "signal/frontier_coverage_5/group_std_mean": 0.3130028069019318, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004010894149541855, "signal/frontier_coverage_5/weight": 0.016146432608366013, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004010894149541855, "signal/frontier_ece_reward/centered_abs_mean": 0.04076602905988693, "signal/frontier_ece_reward/group_bin_occupancy": 0.70234375, "signal/frontier_ece_reward/group_std_mean": 0.054039982706308366, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004931167047470808, "signal/frontier_ece_reward/weight": 0.12097169011831284, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004931167047470808, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07719949334859848, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.580078125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.09887575209140778, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015121790021657944, "signal/frontier_entropy_batch_reward/weight": 0.19595331251621245, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015121790021657944, "step": 70 }, { "adaptive_ema/accuracy_reward": 0.3219364410288985, "adaptive_ema/brier_reward": 0.5454268242624374, "adaptive_ema/format_reward": 0.8221584566218649, "adaptive_ema/frontier_aurc_reward": 0.17115832332112776, "adaptive_ema/frontier_coverage_1": 0.23319734935747088, "adaptive_ema/frontier_coverage_10": 0.23319734935747088, "adaptive_ema/frontier_coverage_15": 0.23319734935747088, "adaptive_ema/frontier_coverage_20": 0.23319734935747088, "adaptive_ema/frontier_coverage_25": 0.23319734935747088, "adaptive_ema/frontier_coverage_5": 0.23319734935747088, "adaptive_ema/frontier_ece_reward": 0.17682809336381372, "adaptive_ema/frontier_entropy_batch_reward": -0.30664606653046406, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.06764657944440841, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.017662768438458442, "adaptive_weight/frontier_coverage_1": 0.0163407064974308, "adaptive_weight/frontier_coverage_10": 0.0163407064974308, "adaptive_weight/frontier_coverage_15": 0.0163407064974308, "adaptive_weight/frontier_coverage_20": 0.0163407064974308, "adaptive_weight/frontier_coverage_25": 0.0163407064974308, "adaptive_weight/frontier_coverage_5": 0.0163407064974308, "adaptive_weight/frontier_ece_reward": 0.1224995955824852, "adaptive_weight/frontier_entropy_batch_reward": 0.1944468140602112, "calibration/aurc": 0.3872978113781852, "calibration/batch_distribution_entropy": 0.9483224085405197, "calibration/batch_entropy_100bins": 0.6823282298201587, "calibration/batch_entropy_10bins": 0.9483224085405197, "calibration/batch_entropy_50bins": 0.7821902097105213, "calibration/batch_uniqueness": 0.8889759547000595, "calibration/buffer_distribution_entropy": 0.9458731353518836, "calibration/buffer_entropy_100bins": 0.6739486859505013, "calibration/buffer_entropy_10bins": 0.9458731353518836, "calibration/buffer_entropy_50bins": 0.7745501236562791, "calibration/confidence_entropy": 0.4855388941567712, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.07734375, "calibration/coverage@15%": 0.125, "calibration/coverage@20%": 0.190625, "calibration/coverage@25%": 0.2046875, "calibration/coverage@30%": 0.29466911764705883, "calibration/coverage@5%": 0.0, "calibration/ece": 0.17449257863567286, "calibration/mean_confidence": 0.4862572071414702, "calibration/prompt_uniqueness": 0.7823488614561654, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 964.0, "completions/max_terminated_length": 572.0, "completions/mean_length": 194.5607421875, "completions/mean_terminated_length": 194.03685302734374, "completions/min_length": 80.2, "completions/min_terminated_length": 80.2, "epoch": 0.24, "grad_norm": 0.0011437971843406558, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 252774796.0, "reward": 0.8140671372413635, "reward_std": 0.110272516310215, "rewards/accuracy_reward": 0.52626953125, "rewards/brier_reward": 0.7511320590972901, "rewards/format_reward": 0.99892578125, "rewards/frontier_aurc_reward": -0.0030971964821219443, "rewards/frontier_coverage_1": 0.07542620496824384, "rewards/frontier_coverage_10": 0.07542620496824384, "rewards/frontier_coverage_15": 0.07542620496824384, "rewards/frontier_coverage_20": 0.07542620496824384, "rewards/frontier_coverage_25": 0.07542620496824384, "rewards/frontier_coverage_5": 0.07542620496824384, "rewards/frontier_ece_reward": 0.019243543781340123, "rewards/frontier_entropy_batch_reward": -0.04741813093423843, "signal/accuracy_reward/centered_abs_mean": 0.147760009765625, "signal/accuracy_reward/group_bin_occupancy": 0.194140625, "signal/accuracy_reward/group_std_mean": 0.19448045492172242, "signal/accuracy_reward/group_zero_std_frac": 0.446875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0738800048828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0738800048828125, "signal/advantage_abs_mean": 0.08491412997245788, "signal/advantage_pre_scale_abs_mean": 0.08491412997245788, "signal/advantage_pre_scale_std": 0.13074929565191268, "signal/advantage_std": 0.13074929565191268, "signal/brier_reward/centered_abs_mean": 0.18371776044368743, "signal/brier_reward/group_bin_occupancy": 0.861328125, "signal/brier_reward/group_std_mean": 0.2315950334072113, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0124272545799613, "signal/brier_reward/weight": 0.06764657944440841, "signal/brier_reward/weighted_centered_abs_mean": 0.0124272545799613, "signal/format_reward/centered_abs_mean": 0.002081298828125, "signal/format_reward/group_bin_occupancy": 0.129296875, "signal/format_reward/group_std_mean": 0.006076698703691363, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002888940554112196, "signal/frontier_aurc_reward/group_bin_occupancy": 0.732421875, "signal/frontier_aurc_reward/group_std_mean": 0.004471804574131965, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.103433359181509e-05, "signal/frontier_aurc_reward/weight": 0.017662768438458442, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.103433359181509e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2310771405696869, "signal/frontier_coverage_1/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_1/group_std_mean": 0.29875036478042605, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037761200219392776, "signal/frontier_coverage_1/weight": 0.0163407064974308, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037761200219392776, "signal/frontier_coverage_10/centered_abs_mean": 0.2310771405696869, "signal/frontier_coverage_10/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_10/group_std_mean": 0.29875036478042605, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037761200219392776, "signal/frontier_coverage_10/weight": 0.0163407064974308, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037761200219392776, "signal/frontier_coverage_15/centered_abs_mean": 0.2310771405696869, "signal/frontier_coverage_15/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_15/group_std_mean": 0.29875036478042605, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037761200219392776, "signal/frontier_coverage_15/weight": 0.0163407064974308, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037761200219392776, "signal/frontier_coverage_20/centered_abs_mean": 0.2310771405696869, "signal/frontier_coverage_20/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_20/group_std_mean": 0.29875036478042605, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037761200219392776, "signal/frontier_coverage_20/weight": 0.0163407064974308, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037761200219392776, "signal/frontier_coverage_25/centered_abs_mean": 0.2310771405696869, "signal/frontier_coverage_25/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_25/group_std_mean": 0.29875036478042605, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037761200219392776, "signal/frontier_coverage_25/weight": 0.0163407064974308, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037761200219392776, "signal/frontier_coverage_5/centered_abs_mean": 0.2310771405696869, "signal/frontier_coverage_5/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_5/group_std_mean": 0.29875036478042605, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037761200219392776, "signal/frontier_coverage_5/weight": 0.0163407064974308, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037761200219392776, "signal/frontier_ece_reward/centered_abs_mean": 0.04328928515315056, "signal/frontier_ece_reward/group_bin_occupancy": 0.728125, "signal/frontier_ece_reward/group_std_mean": 0.0567799873650074, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005302841123193502, "signal/frontier_ece_reward/weight": 0.1224995955824852, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005302841123193502, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07928718775510787, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.621875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.10165912210941315, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015409917011857033, "signal/frontier_entropy_batch_reward/weight": 0.1944468140602112, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015409917011857033, "step": 75 }, { "adaptive_ema/accuracy_reward": 0.3313964150995795, "adaptive_ema/brier_reward": 0.5559265972898461, "adaptive_ema/format_reward": 0.830832507451503, "adaptive_ema/frontier_aurc_reward": 0.16262213049660174, "adaptive_ema/frontier_coverage_1": 0.2263081342605311, "adaptive_ema/frontier_coverage_10": 0.2263081342605311, "adaptive_ema/frontier_coverage_15": 0.2263081342605311, "adaptive_ema/frontier_coverage_20": 0.2263081342605311, "adaptive_ema/frontier_coverage_25": 0.2263081342605311, "adaptive_ema/frontier_coverage_5": 0.2263081342605311, "adaptive_ema/frontier_ece_reward": 0.16915048062563032, "adaptive_ema/frontier_entropy_batch_reward": -0.2940994524556769, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.06624611765146256, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.01788846291601658, "adaptive_weight/frontier_coverage_1": 0.016527966037392615, "adaptive_weight/frontier_coverage_10": 0.016527966037392615, "adaptive_weight/frontier_coverage_15": 0.016527966037392615, "adaptive_weight/frontier_coverage_20": 0.016527966037392615, "adaptive_weight/frontier_coverage_25": 0.016527966037392615, "adaptive_weight/frontier_coverage_5": 0.016527966037392615, "adaptive_weight/frontier_ece_reward": 0.12394552230834961, "adaptive_weight/frontier_entropy_batch_reward": 0.19305209517478944, "calibration/aurc": 0.31895930979708764, "calibration/batch_distribution_entropy": 0.9628895716104496, "calibration/batch_entropy_100bins": 0.6962293359508783, "calibration/batch_entropy_10bins": 0.9628895716104496, "calibration/batch_entropy_50bins": 0.7978527193826969, "calibration/batch_uniqueness": 0.8968208650016194, "calibration/buffer_distribution_entropy": 0.948261085906984, "calibration/buffer_entropy_100bins": 0.6780566979310011, "calibration/buffer_entropy_10bins": 0.948261085906984, "calibration/buffer_entropy_50bins": 0.778304443963483, "calibration/confidence_entropy": 0.4647877319490812, "calibration/coverage@0%": 0.005078125, "calibration/coverage@1%": 0.005078125, "calibration/coverage@10%": 0.08711778375733856, "calibration/coverage@15%": 0.19024660591976517, "calibration/coverage@20%": 0.28438723091976514, "calibration/coverage@25%": 0.39454348091976515, "calibration/coverage@30%": 0.4867539138943249, "calibration/coverage@5%": 0.0140625, "calibration/ece": 0.13742734787793545, "calibration/mean_confidence": 0.4927926374449608, "calibration/prompt_uniqueness": 0.7803107318223205, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1155.6, "completions/max_terminated_length": 555.8, "completions/mean_length": 191.6619140625, "completions/mean_terminated_length": 191.13605041503905, "completions/min_length": 76.8, "completions/min_terminated_length": 76.8, "epoch": 0.256, "grad_norm": 0.001474651973694563, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 269792230.0, "reward": 0.8049997210502624, "reward_std": 0.10463125705718994, "rewards/accuracy_reward": 0.5009765625, "rewards/brier_reward": 0.7641858220100403, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0030391468200832604, "rewards/frontier_coverage_1": 0.10821435190737247, "rewards/frontier_coverage_10": 0.10821435190737247, "rewards/frontier_coverage_15": 0.10821435190737247, "rewards/frontier_coverage_20": 0.10821435190737247, "rewards/frontier_coverage_25": 0.10821435190737247, "rewards/frontier_coverage_5": 0.10821435190737247, "rewards/frontier_ece_reward": 0.020147581398487092, "rewards/frontier_entropy_batch_reward": -0.0477634958922863, "signal/accuracy_reward/centered_abs_mean": 0.14019775390625, "signal/accuracy_reward/group_bin_occupancy": 0.18671875, "signal/accuracy_reward/group_std_mean": 0.17916424572467804, "signal/accuracy_reward/group_zero_std_frac": 0.50625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.070098876953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.070098876953125, "signal/advantage_abs_mean": 0.08192529529333115, "signal/advantage_pre_scale_abs_mean": 0.08192529529333115, "signal/advantage_pre_scale_std": 0.12622717320919036, "signal/advantage_std": 0.12622717320919036, "signal/brier_reward/centered_abs_mean": 0.17578245997428893, "signal/brier_reward/group_bin_occupancy": 0.8453125, "signal/brier_reward/group_std_mean": 0.22181777954101561, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01164692472666502, "signal/brier_reward/weight": 0.06624611765146256, "signal/brier_reward/weighted_centered_abs_mean": 0.01164692472666502, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027812632266432046, "signal/frontier_aurc_reward/group_bin_occupancy": 0.729296875, "signal/frontier_aurc_reward/group_std_mean": 0.004300047783181072, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.974574912921526e-05, "signal/frontier_aurc_reward/weight": 0.01788846291601658, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.974574912921526e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2263425648212433, "signal/frontier_coverage_1/group_bin_occupancy": 0.875, "signal/frontier_coverage_1/group_std_mean": 0.28937026858329773, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037405529990792276, "signal/frontier_coverage_1/weight": 0.016527966037392615, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037405529990792276, "signal/frontier_coverage_10/centered_abs_mean": 0.2263425648212433, "signal/frontier_coverage_10/group_bin_occupancy": 0.875, "signal/frontier_coverage_10/group_std_mean": 0.28937026858329773, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037405529990792276, "signal/frontier_coverage_10/weight": 0.016527966037392615, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037405529990792276, "signal/frontier_coverage_15/centered_abs_mean": 0.2263425648212433, "signal/frontier_coverage_15/group_bin_occupancy": 0.875, "signal/frontier_coverage_15/group_std_mean": 0.28937026858329773, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037405529990792276, "signal/frontier_coverage_15/weight": 0.016527966037392615, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037405529990792276, "signal/frontier_coverage_20/centered_abs_mean": 0.2263425648212433, "signal/frontier_coverage_20/group_bin_occupancy": 0.875, "signal/frontier_coverage_20/group_std_mean": 0.28937026858329773, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037405529990792276, "signal/frontier_coverage_20/weight": 0.016527966037392615, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037405529990792276, "signal/frontier_coverage_25/centered_abs_mean": 0.2263425648212433, "signal/frontier_coverage_25/group_bin_occupancy": 0.875, "signal/frontier_coverage_25/group_std_mean": 0.28937026858329773, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037405529990792276, "signal/frontier_coverage_25/weight": 0.016527966037392615, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037405529990792276, "signal/frontier_coverage_5/centered_abs_mean": 0.2263425648212433, "signal/frontier_coverage_5/group_bin_occupancy": 0.875, "signal/frontier_coverage_5/group_std_mean": 0.28937026858329773, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037405529990792276, "signal/frontier_coverage_5/weight": 0.016527966037392615, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037405529990792276, "signal/frontier_ece_reward/centered_abs_mean": 0.04063420295715332, "signal/frontier_ece_reward/group_bin_occupancy": 0.70859375, "signal/frontier_ece_reward/group_std_mean": 0.05336618795990944, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005035725235939026, "signal/frontier_ece_reward/weight": 0.12394552230834961, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005035725235939026, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08213324025273323, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.59765625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.10542523190379142, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015855711698532105, "signal/frontier_entropy_batch_reward/weight": 0.19305209517478944, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015855711698532105, "step": 80 }, { "adaptive_ema/accuracy_reward": 0.3397445289155506, "adaptive_ema/brier_reward": 0.5661552175485272, "adaptive_ema/format_reward": 0.8390983751782162, "adaptive_ema/frontier_aurc_reward": 0.1545018851710344, "adaptive_ema/frontier_coverage_1": 0.22056885627218503, "adaptive_ema/frontier_coverage_10": 0.22056885627218503, "adaptive_ema/frontier_coverage_15": 0.22056885627218503, "adaptive_ema/frontier_coverage_20": 0.22056885627218503, "adaptive_ema/frontier_coverage_25": 0.22056885627218503, "adaptive_ema/frontier_coverage_5": 0.22056885627218503, "adaptive_ema/frontier_ece_reward": 0.16177079064652503, "adaptive_ema/frontier_entropy_batch_reward": -0.28177420146855825, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.06489593386650086, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.01811096929013729, "adaptive_weight/frontier_coverage_1": 0.01669577695429325, "adaptive_weight/frontier_coverage_10": 0.01669577695429325, "adaptive_weight/frontier_coverage_15": 0.01669577695429325, "adaptive_weight/frontier_coverage_20": 0.01669577695429325, "adaptive_weight/frontier_coverage_25": 0.01669577695429325, "adaptive_weight/frontier_coverage_5": 0.01669577695429325, "adaptive_weight/frontier_ece_reward": 0.1253859281539917, "adaptive_weight/frontier_entropy_batch_reward": 0.1917325049638748, "calibration/aurc": 0.3813991127365959, "calibration/batch_distribution_entropy": 0.9642356723108738, "calibration/batch_entropy_100bins": 0.6876133130751552, "calibration/batch_entropy_10bins": 0.9642356723108738, "calibration/batch_entropy_50bins": 0.7879933270345396, "calibration/batch_uniqueness": 0.8944073325171974, "calibration/buffer_distribution_entropy": 0.9506694534538565, "calibration/buffer_entropy_100bins": 0.6816668870467762, "calibration/buffer_entropy_10bins": 0.9506694534538565, "calibration/buffer_entropy_50bins": 0.7816761689320492, "calibration/confidence_entropy": 0.485735322011209, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.055078125, "calibration/coverage@15%": 0.1421875, "calibration/coverage@20%": 0.2015625, "calibration/coverage@25%": 0.2703125, "calibration/coverage@30%": 0.3711755442759296, "calibration/coverage@5%": 0.0, "calibration/ece": 0.1416843897730325, "calibration/mean_confidence": 0.48195591092772727, "calibration/prompt_uniqueness": 0.7607490976196669, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1346.6, "completions/max_terminated_length": 528.0, "completions/mean_length": 199.40859375, "completions/mean_terminated_length": 198.88611450195313, "completions/min_length": 78.8, "completions/min_terminated_length": 78.8, "epoch": 0.272, "grad_norm": 0.001055843778885901, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 286799870.0, "reward": 0.8040920615196228, "reward_std": 0.0994871512055397, "rewards/accuracy_reward": 0.494921875, "rewards/brier_reward": 0.7610333204269409, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0030955026391893624, "rewards/frontier_coverage_1": 0.10903444737195969, "rewards/frontier_coverage_10": 0.10903444737195969, "rewards/frontier_coverage_15": 0.10903444737195969, "rewards/frontier_coverage_20": 0.10903444737195969, "rewards/frontier_coverage_25": 0.10903444737195969, "rewards/frontier_coverage_5": 0.10903444737195969, "rewards/frontier_ece_reward": 0.016309389285743235, "rewards/frontier_entropy_batch_reward": -0.028366550896316768, "signal/accuracy_reward/centered_abs_mean": 0.13192138671875, "signal/accuracy_reward/group_bin_occupancy": 0.183984375, "signal/accuracy_reward/group_std_mean": 0.16993003189563752, "signal/accuracy_reward/group_zero_std_frac": 0.528125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.065960693359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.065960693359375, "signal/advantage_abs_mean": 0.0777826264500618, "signal/advantage_pre_scale_abs_mean": 0.0777826264500618, "signal/advantage_pre_scale_std": 0.1212904393672943, "signal/advantage_std": 0.1212904393672943, "signal/brier_reward/centered_abs_mean": 0.1730465292930603, "signal/brier_reward/group_bin_occupancy": 0.858984375, "signal/brier_reward/group_std_mean": 0.21734442710876464, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011229727230966092, "signal/brier_reward/weight": 0.06489593386650086, "signal/brier_reward/weighted_centered_abs_mean": 0.011229727230966092, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_bin_occupancy": 0.128125, "signal/format_reward/group_std_mean": 0.004419417306780815, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026437715161591767, "signal/frontier_aurc_reward/group_bin_occupancy": 0.744140625, "signal/frontier_aurc_reward/group_std_mean": 0.0040718474425375465, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.787545258295722e-05, "signal/frontier_aurc_reward/weight": 0.01811096929013729, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.787545258295722e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.23006677329540254, "signal/frontier_coverage_1/group_bin_occupancy": 0.890625, "signal/frontier_coverage_1/group_std_mean": 0.2924661636352539, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038412095978856086, "signal/frontier_coverage_1/weight": 0.01669577695429325, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038412095978856086, "signal/frontier_coverage_10/centered_abs_mean": 0.23006677329540254, "signal/frontier_coverage_10/group_bin_occupancy": 0.890625, "signal/frontier_coverage_10/group_std_mean": 0.2924661636352539, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038412095978856086, "signal/frontier_coverage_10/weight": 0.01669577695429325, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038412095978856086, "signal/frontier_coverage_15/centered_abs_mean": 0.23006677329540254, "signal/frontier_coverage_15/group_bin_occupancy": 0.890625, "signal/frontier_coverage_15/group_std_mean": 0.2924661636352539, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038412095978856086, "signal/frontier_coverage_15/weight": 0.01669577695429325, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038412095978856086, "signal/frontier_coverage_20/centered_abs_mean": 0.23006677329540254, "signal/frontier_coverage_20/group_bin_occupancy": 0.890625, "signal/frontier_coverage_20/group_std_mean": 0.2924661636352539, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038412095978856086, "signal/frontier_coverage_20/weight": 0.01669577695429325, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038412095978856086, "signal/frontier_coverage_25/centered_abs_mean": 0.23006677329540254, "signal/frontier_coverage_25/group_bin_occupancy": 0.890625, "signal/frontier_coverage_25/group_std_mean": 0.2924661636352539, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038412095978856086, "signal/frontier_coverage_25/weight": 0.01669577695429325, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038412095978856086, "signal/frontier_coverage_5/centered_abs_mean": 0.23006677329540254, "signal/frontier_coverage_5/group_bin_occupancy": 0.890625, "signal/frontier_coverage_5/group_std_mean": 0.2924661636352539, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038412095978856086, "signal/frontier_coverage_5/weight": 0.01669577695429325, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038412095978856086, "signal/frontier_ece_reward/centered_abs_mean": 0.03505429700016975, "signal/frontier_ece_reward/group_bin_occupancy": 0.70234375, "signal/frontier_ece_reward/group_std_mean": 0.04616514593362808, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004394615720957518, "signal/frontier_ece_reward/weight": 0.1253859281539917, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004394615720957518, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.05788676589727402, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.588671875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.07604653090238571, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011109101679176092, "signal/frontier_entropy_batch_reward/weight": 0.1917325049638748, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011109101679176092, "step": 85 }, { "adaptive_ema/accuracy_reward": 0.34651812406479604, "adaptive_ema/brier_reward": 0.5753681688750691, "adaptive_ema/format_reward": 0.84693925290468, "adaptive_ema/frontier_aurc_reward": 0.1467695300595397, "adaptive_ema/frontier_coverage_1": 0.21532816272022975, "adaptive_ema/frontier_coverage_10": 0.21532816272022975, "adaptive_ema/frontier_coverage_15": 0.21532816272022975, "adaptive_ema/frontier_coverage_20": 0.21532816272022975, "adaptive_ema/frontier_coverage_25": 0.21532816272022975, "adaptive_ema/frontier_coverage_5": 0.21532816272022975, "adaptive_ema/frontier_ece_reward": 0.15449289534769098, "adaptive_ema/frontier_entropy_batch_reward": -0.26877567477837994, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.06369541734457015, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.018327684327960014, "adaptive_weight/frontier_coverage_1": 0.016855016723275183, "adaptive_weight/frontier_coverage_10": 0.016855016723275183, "adaptive_weight/frontier_coverage_15": 0.016855016723275183, "adaptive_weight/frontier_coverage_20": 0.016855016723275183, "adaptive_weight/frontier_coverage_25": 0.016855016723275183, "adaptive_weight/frontier_coverage_5": 0.016855016723275183, "adaptive_weight/frontier_ece_reward": 0.1268280863761902, "adaptive_weight/frontier_entropy_batch_reward": 0.19031870663166045, "calibration/aurc": 0.37261471884124653, "calibration/batch_distribution_entropy": 0.9750497694198141, "calibration/batch_entropy_100bins": 0.6965538382719203, "calibration/batch_entropy_10bins": 0.9750497694198141, "calibration/batch_entropy_50bins": 0.8007210950717083, "calibration/batch_uniqueness": 0.9004279470884367, "calibration/buffer_distribution_entropy": 0.9530347440047425, "calibration/buffer_entropy_100bins": 0.6844022960456384, "calibration/buffer_entropy_10bins": 0.9530347440047425, "calibration/buffer_entropy_50bins": 0.7844857201320607, "calibration/confidence_entropy": 0.4903165946191407, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.015625, "calibration/coverage@15%": 0.050390625, "calibration/coverage@20%": 0.093359375, "calibration/coverage@25%": 0.149609375, "calibration/coverage@30%": 0.28918480919765166, "calibration/coverage@5%": 0.015625, "calibration/ece": 0.11769263418313192, "calibration/mean_confidence": 0.4960635546617686, "calibration/prompt_uniqueness": 0.7969746886381374, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1159.0, "completions/max_terminated_length": 560.0, "completions/mean_length": 193.3376953125, "completions/mean_terminated_length": 192.68275756835936, "completions/min_length": 83.4, "completions/min_terminated_length": 83.4, "epoch": 0.288, "grad_norm": 0.0019680638797581196, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 303737824.0, "reward": 0.8043852686882019, "reward_std": 0.10160344392061234, "rewards/accuracy_reward": 0.49619140625, "rewards/brier_reward": 0.7544908285140991, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0031197931617498397, "rewards/frontier_coverage_1": 0.10280315726995468, "rewards/frontier_coverage_10": 0.10280315726995468, "rewards/frontier_coverage_15": 0.10280315726995468, "rewards/frontier_coverage_20": 0.10280315726995468, "rewards/frontier_coverage_25": 0.10280315726995468, "rewards/frontier_coverage_5": 0.10280315726995468, "rewards/frontier_ece_reward": 0.014545264653861523, "rewards/frontier_entropy_batch_reward": -0.019750583730638028, "signal/accuracy_reward/centered_abs_mean": 0.139532470703125, "signal/accuracy_reward/group_bin_occupancy": 0.1890625, "signal/accuracy_reward/group_std_mean": 0.18288062512874603, "signal/accuracy_reward/group_zero_std_frac": 0.4875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0697662353515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0697662353515625, "signal/advantage_abs_mean": 0.07806335389614105, "signal/advantage_pre_scale_abs_mean": 0.07806335389614105, "signal/advantage_pre_scale_std": 0.12149789035320283, "signal/advantage_std": 0.12149789035320283, "signal/brier_reward/centered_abs_mean": 0.17784371674060823, "signal/brier_reward/group_bin_occupancy": 0.846484375, "signal/brier_reward/group_std_mean": 0.22363564372062683, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011328470706939698, "signal/brier_reward/weight": 0.06369541734457015, "signal/brier_reward/weighted_centered_abs_mean": 0.011328470706939698, "signal/format_reward/centered_abs_mean": 0.001312255859375, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0035306816454976795, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024917138274759055, "signal/frontier_aurc_reward/group_bin_occupancy": 0.736328125, "signal/frontier_aurc_reward/group_std_mean": 0.0037776767276227474, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.565996496239677e-05, "signal/frontier_aurc_reward/weight": 0.018327684327960014, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.565996496239677e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.23597144186496735, "signal/frontier_coverage_1/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_1/group_std_mean": 0.30281777381896974, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0039774681441485885, "signal/frontier_coverage_1/weight": 0.016855016723275183, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0039774681441485885, "signal/frontier_coverage_10/centered_abs_mean": 0.23597144186496735, "signal/frontier_coverage_10/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_10/group_std_mean": 0.30281777381896974, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0039774681441485885, "signal/frontier_coverage_10/weight": 0.016855016723275183, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0039774681441485885, "signal/frontier_coverage_15/centered_abs_mean": 0.23597144186496735, "signal/frontier_coverage_15/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_15/group_std_mean": 0.30281777381896974, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0039774681441485885, "signal/frontier_coverage_15/weight": 0.016855016723275183, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0039774681441485885, "signal/frontier_coverage_20/centered_abs_mean": 0.23597144186496735, "signal/frontier_coverage_20/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_20/group_std_mean": 0.30281777381896974, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0039774681441485885, "signal/frontier_coverage_20/weight": 0.016855016723275183, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0039774681441485885, "signal/frontier_coverage_25/centered_abs_mean": 0.23597144186496735, "signal/frontier_coverage_25/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_25/group_std_mean": 0.30281777381896974, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0039774681441485885, "signal/frontier_coverage_25/weight": 0.016855016723275183, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0039774681441485885, "signal/frontier_coverage_5/centered_abs_mean": 0.23597144186496735, "signal/frontier_coverage_5/group_bin_occupancy": 0.88515625, "signal/frontier_coverage_5/group_std_mean": 0.30281777381896974, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0039774681441485885, "signal/frontier_coverage_5/weight": 0.016855016723275183, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0039774681441485885, "signal/frontier_ece_reward/centered_abs_mean": 0.033656676113605496, "signal/frontier_ece_reward/group_bin_occupancy": 0.679296875, "signal/frontier_ece_reward/group_std_mean": 0.04492291808128357, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004268449451774359, "signal/frontier_ece_reward/weight": 0.1268280863761902, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004268449451774359, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.04934029281139374, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.593359375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.06619658097624778, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.009388817101716995, "signal/frontier_entropy_batch_reward/weight": 0.19031870663166045, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.009388817101716995, "step": 90 }, { "adaptive_ema/accuracy_reward": 0.3547448487288868, "adaptive_ema/brier_reward": 0.583940797601032, "adaptive_ema/format_reward": 0.8544245276978717, "adaptive_ema/frontier_aurc_reward": 0.13943217269215363, "adaptive_ema/frontier_coverage_1": 0.20918630842104977, "adaptive_ema/frontier_coverage_10": 0.20918630842104977, "adaptive_ema/frontier_coverage_15": 0.20918630842104977, "adaptive_ema/frontier_coverage_20": 0.20918630842104977, "adaptive_ema/frontier_coverage_25": 0.20918630842104977, "adaptive_ema/frontier_coverage_5": 0.20918630842104977, "adaptive_ema/frontier_ece_reward": 0.14765743845246337, "adaptive_ema/frontier_entropy_batch_reward": -0.25743292047942407, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.06253615096211433, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.018522783368825912, "adaptive_weight/frontier_coverage_1": 0.01702139526605606, "adaptive_weight/frontier_coverage_10": 0.01702139526605606, "adaptive_weight/frontier_coverage_15": 0.01702139526605606, "adaptive_weight/frontier_coverage_20": 0.01702139526605606, "adaptive_weight/frontier_coverage_25": 0.01702139526605606, "adaptive_weight/frontier_coverage_5": 0.01702139526605606, "adaptive_weight/frontier_ece_reward": 0.1281127244234085, "adaptive_weight/frontier_entropy_batch_reward": 0.18899996280670167, "calibration/aurc": 0.31297741273515317, "calibration/batch_distribution_entropy": 0.9676274846939196, "calibration/batch_entropy_100bins": 0.7031356835430308, "calibration/batch_entropy_10bins": 0.9676274846939196, "calibration/batch_entropy_50bins": 0.8033018250346864, "calibration/batch_uniqueness": 0.8989820714622715, "calibration/buffer_distribution_entropy": 0.9552243834756542, "calibration/buffer_entropy_100bins": 0.687498391590213, "calibration/buffer_entropy_10bins": 0.9552243834756542, "calibration/buffer_entropy_50bins": 0.7874907977014229, "calibration/confidence_entropy": 0.48079966853899203, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0794138331702544, "calibration/coverage@15%": 0.16190909368884537, "calibration/coverage@20%": 0.21037869985322896, "calibration/coverage@25%": 0.38351807118395304, "calibration/coverage@30%": 0.5058372064579256, "calibration/coverage@5%": 0.0, "calibration/ece": 0.13226369381293737, "calibration/mean_confidence": 0.5072106308874051, "calibration/prompt_uniqueness": 0.7839407802744537, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1068.2, "completions/max_terminated_length": 860.4, "completions/mean_length": 194.43525390625, "completions/mean_terminated_length": 194.04138793945313, "completions/min_length": 82.6, "completions/min_terminated_length": 82.6, "epoch": 0.304, "grad_norm": 0.0018993834964931011, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 320658793.0, "reward": 0.8003029823303223, "reward_std": 0.09959358870983123, "rewards/accuracy_reward": 0.49970703125, "rewards/brier_reward": 0.7468193769454956, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003175217052921653, "rewards/frontier_coverage_1": 0.09870433807373047, "rewards/frontier_coverage_10": 0.09870433807373047, "rewards/frontier_coverage_15": 0.09870433807373047, "rewards/frontier_coverage_20": 0.09870433807373047, "rewards/frontier_coverage_25": 0.09870433807373047, "rewards/frontier_coverage_5": 0.09870433807373047, "rewards/frontier_ece_reward": 0.012848987244069576, "rewards/frontier_entropy_batch_reward": -0.041863073408603665, "signal/accuracy_reward/centered_abs_mean": 0.133673095703125, "signal/accuracy_reward/group_bin_occupancy": 0.18671875, "signal/accuracy_reward/group_std_mean": 0.1743601679801941, "signal/accuracy_reward/group_zero_std_frac": 0.50625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0668365478515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0668365478515625, "signal/advantage_abs_mean": 0.07773177325725555, "signal/advantage_pre_scale_abs_mean": 0.07773177325725555, "signal/advantage_pre_scale_std": 0.11809686571359634, "signal/advantage_std": 0.11809686571359634, "signal/brier_reward/centered_abs_mean": 0.1809590458869934, "signal/brier_reward/group_bin_occupancy": 0.85234375, "signal/brier_reward/group_std_mean": 0.22669825851917266, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011314875073730945, "signal/brier_reward/weight": 0.06253615096211433, "signal/brier_reward/weighted_centered_abs_mean": 0.011314875073730945, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025466226506978273, "signal/frontier_aurc_reward/group_bin_occupancy": 0.727734375, "signal/frontier_aurc_reward/group_std_mean": 0.0038951355498284103, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.718490381492302e-05, "signal/frontier_aurc_reward/weight": 0.018522783368825912, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.718490381492302e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.24377259016036987, "signal/frontier_coverage_1/group_bin_occupancy": 0.878515625, "signal/frontier_coverage_1/group_std_mean": 0.31025264263153074, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004149554390460252, "signal/frontier_coverage_1/weight": 0.01702139526605606, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004149554390460252, "signal/frontier_coverage_10/centered_abs_mean": 0.24377259016036987, "signal/frontier_coverage_10/group_bin_occupancy": 0.878515625, "signal/frontier_coverage_10/group_std_mean": 0.31025264263153074, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004149554390460252, "signal/frontier_coverage_10/weight": 0.01702139526605606, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004149554390460252, "signal/frontier_coverage_15/centered_abs_mean": 0.24377259016036987, "signal/frontier_coverage_15/group_bin_occupancy": 0.878515625, "signal/frontier_coverage_15/group_std_mean": 0.31025264263153074, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004149554390460252, "signal/frontier_coverage_15/weight": 0.01702139526605606, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004149554390460252, "signal/frontier_coverage_20/centered_abs_mean": 0.24377259016036987, "signal/frontier_coverage_20/group_bin_occupancy": 0.878515625, "signal/frontier_coverage_20/group_std_mean": 0.31025264263153074, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004149554390460252, "signal/frontier_coverage_20/weight": 0.01702139526605606, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004149554390460252, "signal/frontier_coverage_25/centered_abs_mean": 0.24377259016036987, "signal/frontier_coverage_25/group_bin_occupancy": 0.878515625, "signal/frontier_coverage_25/group_std_mean": 0.31025264263153074, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004149554390460252, "signal/frontier_coverage_25/weight": 0.01702139526605606, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004149554390460252, "signal/frontier_coverage_5/centered_abs_mean": 0.24377259016036987, "signal/frontier_coverage_5/group_bin_occupancy": 0.878515625, "signal/frontier_coverage_5/group_std_mean": 0.31025264263153074, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004149554390460252, "signal/frontier_coverage_5/weight": 0.01702139526605606, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004149554390460252, "signal/frontier_ece_reward/centered_abs_mean": 0.03277038559317589, "signal/frontier_ece_reward/group_bin_occupancy": 0.675, "signal/frontier_ece_reward/group_std_mean": 0.04338055402040482, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004198975954204797, "signal/frontier_ece_reward/weight": 0.1281127244234085, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004198975954204797, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07316073104739189, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.58046875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.09631870687007904, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013831710256636143, "signal/frontier_entropy_batch_reward/weight": 0.18899996280670167, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013831710256636143, "step": 95 }, { "adaptive_ema/accuracy_reward": 0.3615936058711512, "adaptive_ema/brier_reward": 0.5923757457391803, "adaptive_ema/format_reward": 0.8615380862676577, "adaptive_ema/frontier_aurc_reward": 0.13243977950769706, "adaptive_ema/frontier_coverage_1": 0.20445407942527577, "adaptive_ema/frontier_coverage_10": 0.20445407942527577, "adaptive_ema/frontier_coverage_15": 0.20445407942527577, "adaptive_ema/frontier_coverage_20": 0.20445407942527577, "adaptive_ema/frontier_coverage_25": 0.20445407942527577, "adaptive_ema/frontier_coverage_5": 0.20445407942527577, "adaptive_ema/frontier_ece_reward": 0.14114867916699728, "adaptive_ema/frontier_entropy_batch_reward": -0.24705242028494584, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.06140188053250313, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.01871398314833641, "adaptive_weight/frontier_coverage_1": 0.01716057360172272, "adaptive_weight/frontier_coverage_10": 0.01716057360172272, "adaptive_weight/frontier_coverage_15": 0.01716057360172272, "adaptive_weight/frontier_coverage_20": 0.01716057360172272, "adaptive_weight/frontier_coverage_25": 0.01716057360172272, "adaptive_weight/frontier_coverage_5": 0.01716057360172272, "adaptive_weight/frontier_ece_reward": 0.12937237024307252, "adaptive_weight/frontier_entropy_batch_reward": 0.18784832060337067, "calibration/aurc": 0.26604642089660946, "calibration/batch_distribution_entropy": 0.9707172276846515, "calibration/batch_entropy_100bins": 0.7014501595964525, "calibration/batch_entropy_10bins": 0.9707172276846515, "calibration/batch_entropy_50bins": 0.801622235231511, "calibration/batch_uniqueness": 0.9017338094976054, "calibration/buffer_distribution_entropy": 0.9568832194697796, "calibration/buffer_entropy_100bins": 0.6906226373657561, "calibration/buffer_entropy_10bins": 0.9568832194697796, "calibration/buffer_entropy_50bins": 0.7903461780151347, "calibration/confidence_entropy": 0.4662896261667061, "calibration/coverage@0%": 0.006262230919765166, "calibration/coverage@1%": 0.006262230919765166, "calibration/coverage@10%": 0.15318386130136985, "calibration/coverage@15%": 0.3173610261741683, "calibration/coverage@20%": 0.4123524645303327, "calibration/coverage@25%": 0.5018713307240704, "calibration/coverage@30%": 0.5972449853228963, "calibration/coverage@5%": 0.016829745596868884, "calibration/ece": 0.14112387551981412, "calibration/mean_confidence": 0.5221430184381115, "calibration/prompt_uniqueness": 0.7770071865244537, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 950.6, "completions/max_terminated_length": 545.6, "completions/mean_length": 190.3484375, "completions/mean_terminated_length": 190.08496704101563, "completions/min_length": 82.8, "completions/min_terminated_length": 82.8, "epoch": 0.32, "grad_norm": 0.0008083516149781644, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 337696665.0, "reward": 0.807330322265625, "reward_std": 0.08853928595781327, "rewards/accuracy_reward": 0.51435546875, "rewards/brier_reward": 0.7678199291229248, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0029150643851608036, "rewards/frontier_coverage_1": 0.11148936003446579, "rewards/frontier_coverage_10": 0.11148936003446579, "rewards/frontier_coverage_15": 0.11148936003446579, "rewards/frontier_coverage_20": 0.11148936003446579, "rewards/frontier_coverage_25": 0.11148936003446579, "rewards/frontier_coverage_5": 0.11148936003446579, "rewards/frontier_ece_reward": 0.019013339094817637, "rewards/frontier_entropy_batch_reward": -0.057412856817245485, "signal/accuracy_reward/centered_abs_mean": 0.099041748046875, "signal/accuracy_reward/group_bin_occupancy": 0.180078125, "signal/accuracy_reward/group_std_mean": 0.14076800048351287, "signal/accuracy_reward/group_zero_std_frac": 0.559375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0495208740234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0495208740234375, "signal/advantage_abs_mean": 0.06596728339791298, "signal/advantage_pre_scale_abs_mean": 0.06596728339791298, "signal/advantage_pre_scale_std": 0.1075965479016304, "signal/advantage_std": 0.1075965479016304, "signal/brier_reward/centered_abs_mean": 0.1712301790714264, "signal/brier_reward/group_bin_occupancy": 0.833984375, "signal/brier_reward/group_std_mean": 0.21626271903514863, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010516363568603993, "signal/brier_reward/weight": 0.06140188053250313, "signal/brier_reward/weighted_centered_abs_mean": 0.010516363568603993, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.002762135770171881, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026979228015989063, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71640625, "signal/frontier_aurc_reward/group_std_mean": 0.004028804274275899, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.047983067925088e-05, "signal/frontier_aurc_reward/weight": 0.01871398314833641, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.047983067925088e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2138041526079178, "signal/frontier_coverage_1/group_bin_occupancy": 0.853515625, "signal/frontier_coverage_1/group_std_mean": 0.27440894246101377, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003668808238580823, "signal/frontier_coverage_1/weight": 0.01716057360172272, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003668808238580823, "signal/frontier_coverage_10/centered_abs_mean": 0.2138041526079178, "signal/frontier_coverage_10/group_bin_occupancy": 0.853515625, "signal/frontier_coverage_10/group_std_mean": 0.27440894246101377, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003668808238580823, "signal/frontier_coverage_10/weight": 0.01716057360172272, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003668808238580823, "signal/frontier_coverage_15/centered_abs_mean": 0.2138041526079178, "signal/frontier_coverage_15/group_bin_occupancy": 0.853515625, "signal/frontier_coverage_15/group_std_mean": 0.27440894246101377, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003668808238580823, "signal/frontier_coverage_15/weight": 0.01716057360172272, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003668808238580823, "signal/frontier_coverage_20/centered_abs_mean": 0.2138041526079178, "signal/frontier_coverage_20/group_bin_occupancy": 0.853515625, "signal/frontier_coverage_20/group_std_mean": 0.27440894246101377, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003668808238580823, "signal/frontier_coverage_20/weight": 0.01716057360172272, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003668808238580823, "signal/frontier_coverage_25/centered_abs_mean": 0.2138041526079178, "signal/frontier_coverage_25/group_bin_occupancy": 0.853515625, "signal/frontier_coverage_25/group_std_mean": 0.27440894246101377, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003668808238580823, "signal/frontier_coverage_25/weight": 0.01716057360172272, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003668808238580823, "signal/frontier_coverage_5/centered_abs_mean": 0.2138041526079178, "signal/frontier_coverage_5/group_bin_occupancy": 0.853515625, "signal/frontier_coverage_5/group_std_mean": 0.27440894246101377, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003668808238580823, "signal/frontier_coverage_5/weight": 0.01716057360172272, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003668808238580823, "signal/frontier_ece_reward/centered_abs_mean": 0.035488611459732054, "signal/frontier_ece_reward/group_bin_occupancy": 0.655078125, "signal/frontier_ece_reward/group_std_mean": 0.045805665850639346, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004591076914221048, "signal/frontier_ece_reward/weight": 0.12937237024307252, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004591076914221048, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09487930536270142, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.58828125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.12115364670753478, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0178196107968688, "signal/frontier_entropy_batch_reward/weight": 0.18784832060337067, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0178196107968688, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.5622950821720393, "eval_calibration/batch_distribution_entropy": 0.8914133677236195, "eval_calibration/batch_entropy_100bins": 0.5628644573084215, "eval_calibration/batch_entropy_10bins": 0.8914133677236195, "eval_calibration/batch_entropy_50bins": 0.6625949320728552, "eval_calibration/batch_uniqueness": 0.8203125, "eval_calibration/buffer_distribution_entropy": 0.9578292174133181, "eval_calibration/buffer_entropy_100bins": 0.692119963145951, "eval_calibration/buffer_entropy_10bins": 0.9578292174133181, "eval_calibration/buffer_entropy_50bins": 0.7917815965358247, "eval_calibration/confidence_entropy": 0.4636687518786541, "eval_calibration/coverage@0%": 0.015625, "eval_calibration/coverage@1%": 0.015625, "eval_calibration/coverage@10%": 0.015625, "eval_calibration/coverage@15%": 0.0703125, "eval_calibration/coverage@20%": 0.0703125, "eval_calibration/coverage@25%": 0.0859375, "eval_calibration/coverage@30%": 0.09375, "eval_calibration/coverage@5%": 0.015625, "eval_calibration/ece": 0.21585937500000002, "eval_calibration/mean_confidence": 0.406015625, "eval_calibration/prompt_uniqueness": 0.8203125, "eval_completions/clipped_ratio": 0.001953125, "eval_completions/max_length": 632.0, "eval_completions/max_terminated_length": 331.5, "eval_completions/mean_length": 192.6338233947754, "eval_completions/mean_terminated_length": 190.00860977172852, "eval_completions/min_length": 95.5, "eval_completions/min_terminated_length": 95.5, "eval_loss": 0.0, "eval_num_tokens": 337696665.0, "eval_reward": 0.7206540256738663, "eval_reward_std": 0.23302211984992027, "eval_rewards/accuracy_reward": 0.41015625, "eval_rewards/brier_reward": 0.7707347571849823, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_aurc_reward": -0.0035720185842365026, "eval_rewards/frontier_coverage_1": 0.19156523048877716, "eval_rewards/frontier_coverage_10": 0.19156523048877716, "eval_rewards/frontier_coverage_15": 0.19156523048877716, "eval_rewards/frontier_coverage_20": 0.19156523048877716, "eval_rewards/frontier_coverage_25": 0.19156523048877716, "eval_rewards/frontier_coverage_5": 0.19156523048877716, "eval_rewards/frontier_ece_reward": 0.013612536480650306, "eval_rewards/frontier_entropy_batch_reward": -0.2766956575214863, "eval_runtime": 28.3301, "eval_samples_per_second": 17.649, "eval_signal/accuracy_reward/centered_abs_mean": 0.474853515625, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.49471620470285416, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2374267578125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2374267578125, "eval_signal/advantage_abs_mean": 0.2067079357802868, "eval_signal/advantage_pre_scale_abs_mean": 0.2067079357802868, "eval_signal/advantage_pre_scale_std": 0.2311190329492092, "eval_signal/advantage_std": 0.2311190329492092, "eval_signal/brier_reward/centered_abs_mean": 0.21524429693818092, "eval_signal/brier_reward/group_bin_occupancy": 0.8984375, "eval_signal/brier_reward/group_std_mean": 0.26688605546951294, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013107528910040855, "eval_signal/brier_reward/weight": 0.06089605763554573, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.013107528910040855, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_bin_occupancy": 0.1328125, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003758925129659474, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.78125, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0061155634466558695, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.062661461532116e-05, "eval_signal/frontier_aurc_reward/weight": 0.01878904551267624, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.062661461532116e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3889065384864807, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9296875, "eval_signal/frontier_coverage_1/group_std_mean": 0.47960302233695984, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006695706397294998, "eval_signal/frontier_coverage_1/weight": 0.017216749489307404, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006695706397294998, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3889065384864807, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9296875, "eval_signal/frontier_coverage_10/group_std_mean": 0.47960302233695984, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006695706397294998, "eval_signal/frontier_coverage_10/weight": 0.017216749489307404, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006695706397294998, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3889065384864807, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9296875, "eval_signal/frontier_coverage_15/group_std_mean": 0.47960302233695984, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006695706397294998, "eval_signal/frontier_coverage_15/weight": 0.017216749489307404, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006695706397294998, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3889065384864807, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9296875, "eval_signal/frontier_coverage_20/group_std_mean": 0.47960302233695984, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006695706397294998, "eval_signal/frontier_coverage_20/weight": 0.017216749489307404, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006695706397294998, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.3889065384864807, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9296875, "eval_signal/frontier_coverage_25/group_std_mean": 0.47960302233695984, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006695706397294998, "eval_signal/frontier_coverage_25/weight": 0.017216749489307404, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006695706397294998, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3889065384864807, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9296875, "eval_signal/frontier_coverage_5/group_std_mean": 0.47960302233695984, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006695706397294998, "eval_signal/frontier_coverage_5/weight": 0.017216749489307404, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006695706397294998, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.03698669094592333, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8046875, "eval_signal/frontier_ece_reward/group_std_mean": 0.055258942767977715, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004802606999874115, "eval_signal/frontier_ece_reward/weight": 0.12984690070152283, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004802606999874115, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3310266584157944, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.6015625, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.38709257543087006, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.062056735157966614, "eval_signal/frontier_entropy_batch_reward/weight": 0.18746748566627502, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.062056735157966614, "eval_steps_per_second": 0.141, "step": 100 }, { "adaptive_ema/accuracy_reward": 0.3691240030507804, "adaptive_ema/brier_reward": 0.6010773515502917, "adaptive_ema/format_reward": 0.8683059009886822, "adaptive_ema/frontier_aurc_reward": 0.1258143913355397, "adaptive_ema/frontier_coverage_1": 0.19983967871364694, "adaptive_ema/frontier_coverage_10": 0.19983967871364694, "adaptive_ema/frontier_coverage_15": 0.19983967871364694, "adaptive_ema/frontier_coverage_20": 0.19983967871364694, "adaptive_ema/frontier_coverage_25": 0.19983967871364694, "adaptive_ema/frontier_coverage_5": 0.19983967871364694, "adaptive_ema/frontier_ece_reward": 0.13516118757960247, "adaptive_ema/frontier_entropy_batch_reward": -0.23766472191903443, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.06022153198719025, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.01889778971672058, "adaptive_weight/frontier_coverage_1": 0.017297543585300446, "adaptive_weight/frontier_coverage_10": 0.017297543585300446, "adaptive_weight/frontier_coverage_15": 0.017297543585300446, "adaptive_weight/frontier_coverage_20": 0.017297543585300446, "adaptive_weight/frontier_coverage_25": 0.017297543585300446, "adaptive_weight/frontier_coverage_5": 0.017297543585300446, "adaptive_weight/frontier_ece_reward": 0.1305568039417267, "adaptive_weight/frontier_entropy_batch_reward": 0.18683860898017884, "calibration/aurc": 0.3309582655250696, "calibration/batch_distribution_entropy": 0.9672548223786464, "calibration/batch_entropy_100bins": 0.6964700945293234, "calibration/batch_entropy_10bins": 0.9672548223786464, "calibration/batch_entropy_50bins": 0.7965249397583692, "calibration/batch_uniqueness": 0.8961151123046875, "calibration/buffer_distribution_entropy": 0.9592387952294402, "calibration/buffer_entropy_100bins": 0.6936079623072494, "calibration/buffer_entropy_10bins": 0.9592387952294402, "calibration/buffer_entropy_50bins": 0.7931674661918849, "calibration/confidence_entropy": 0.48189768326767374, "calibration/coverage@0%": 0.005859375, "calibration/coverage@1%": 0.005859375, "calibration/coverage@10%": 0.006640625, "calibration/coverage@15%": 0.081640625, "calibration/coverage@20%": 0.177734375, "calibration/coverage@25%": 0.26640625, "calibration/coverage@30%": 0.430859375, "calibration/coverage@5%": 0.005859375, "calibration/ece": 0.13445703125000003, "calibration/mean_confidence": 0.47684765625000003, "calibration/prompt_uniqueness": 0.775341796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 749.4, "completions/max_terminated_length": 543.6, "completions/mean_length": 189.60478515625, "completions/mean_terminated_length": 189.47372741699218, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.336, "grad_norm": 0.000987388426437974, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 354360650.0, "reward": 0.80974360704422, "reward_std": 0.09494156986474991, "rewards/accuracy_reward": 0.52021484375, "rewards/brier_reward": 0.7602529168128968, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0028106594923883676, "rewards/frontier_coverage_1": 0.09387856647372246, "rewards/frontier_coverage_10": 0.09387856647372246, "rewards/frontier_coverage_15": 0.09387856647372246, "rewards/frontier_coverage_20": 0.09387856647372246, "rewards/frontier_coverage_25": 0.09387856647372246, "rewards/frontier_coverage_5": 0.09387856647372246, "rewards/frontier_ece_reward": 0.016277409344911575, "rewards/frontier_entropy_batch_reward": -0.04225642457604408, "signal/accuracy_reward/centered_abs_mean": 0.124945068359375, "signal/accuracy_reward/group_bin_occupancy": 0.18671875, "signal/accuracy_reward/group_std_mean": 0.16898487508296967, "signal/accuracy_reward/group_zero_std_frac": 0.50625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0624725341796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0624725341796875, "signal/advantage_abs_mean": 0.07221008986234664, "signal/advantage_pre_scale_abs_mean": 0.07221008986234664, "signal/advantage_pre_scale_std": 0.11600485146045685, "signal/advantage_std": 0.11600485146045685, "signal/brier_reward/centered_abs_mean": 0.16997200548648833, "signal/brier_reward/group_bin_occupancy": 0.84453125, "signal/brier_reward/group_std_mean": 0.2162840783596039, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01023493316024542, "signal/brier_reward/weight": 0.06022153198719025, "signal/brier_reward/weighted_centered_abs_mean": 0.01023493316024542, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002428735839203, "signal/frontier_aurc_reward/group_bin_occupancy": 0.728515625, "signal/frontier_aurc_reward/group_std_mean": 0.0036500558257102967, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5901389239588755e-05, "signal/frontier_aurc_reward/weight": 0.01889778971672058, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5901389239588755e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22394213676452637, "signal/frontier_coverage_1/group_bin_occupancy": 0.858984375, "signal/frontier_coverage_1/group_std_mean": 0.28910828232765196, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038735364098101853, "signal/frontier_coverage_1/weight": 0.017297543585300446, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038735364098101853, "signal/frontier_coverage_10/centered_abs_mean": 0.22394213676452637, "signal/frontier_coverage_10/group_bin_occupancy": 0.858984375, "signal/frontier_coverage_10/group_std_mean": 0.28910828232765196, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038735364098101853, "signal/frontier_coverage_10/weight": 0.017297543585300446, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038735364098101853, "signal/frontier_coverage_15/centered_abs_mean": 0.22394213676452637, "signal/frontier_coverage_15/group_bin_occupancy": 0.858984375, "signal/frontier_coverage_15/group_std_mean": 0.28910828232765196, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038735364098101853, "signal/frontier_coverage_15/weight": 0.017297543585300446, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038735364098101853, "signal/frontier_coverage_20/centered_abs_mean": 0.22394213676452637, "signal/frontier_coverage_20/group_bin_occupancy": 0.858984375, "signal/frontier_coverage_20/group_std_mean": 0.28910828232765196, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038735364098101853, "signal/frontier_coverage_20/weight": 0.017297543585300446, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038735364098101853, "signal/frontier_coverage_25/centered_abs_mean": 0.22394213676452637, "signal/frontier_coverage_25/group_bin_occupancy": 0.858984375, "signal/frontier_coverage_25/group_std_mean": 0.28910828232765196, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038735364098101853, "signal/frontier_coverage_25/weight": 0.017297543585300446, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038735364098101853, "signal/frontier_coverage_5/centered_abs_mean": 0.22394213676452637, "signal/frontier_coverage_5/group_bin_occupancy": 0.858984375, "signal/frontier_coverage_5/group_std_mean": 0.28910828232765196, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038735364098101853, "signal/frontier_coverage_5/weight": 0.017297543585300446, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038735364098101853, "signal/frontier_ece_reward/centered_abs_mean": 0.029718470200896262, "signal/frontier_ece_reward/group_bin_occupancy": 0.66875, "signal/frontier_ece_reward/group_std_mean": 0.03931205943226814, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003879939578473568, "signal/frontier_ece_reward/weight": 0.1305568039417267, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003879939578473568, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07130372412502765, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.603515625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.09097694158554077, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013328920677304268, "signal/frontier_entropy_batch_reward/weight": 0.18683860898017884, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013328920677304268, "step": 105 }, { "adaptive_ema/accuracy_reward": 0.37519145558631123, "adaptive_ema/brier_reward": 0.609011320573811, "adaptive_ema/format_reward": 0.8747411738981095, "adaptive_ema/frontier_aurc_reward": 0.11950125783945201, "adaptive_ema/frontier_coverage_1": 0.19560125560997757, "adaptive_ema/frontier_coverage_10": 0.19560125560997757, "adaptive_ema/frontier_coverage_15": 0.19560125560997757, "adaptive_ema/frontier_coverage_20": 0.19560125560997757, "adaptive_ema/frontier_coverage_25": 0.19560125560997757, "adaptive_ema/frontier_coverage_5": 0.19560125560997757, "adaptive_ema/frontier_ece_reward": 0.12916943505117517, "adaptive_ema/frontier_entropy_batch_reward": -0.22836366793030613, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.05914327949285507, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.019072819501161575, "adaptive_weight/frontier_coverage_1": 0.017424381524324416, "adaptive_weight/frontier_coverage_10": 0.017424381524324416, "adaptive_weight/frontier_coverage_15": 0.017424381524324416, "adaptive_weight/frontier_coverage_20": 0.017424381524324416, "adaptive_weight/frontier_coverage_25": 0.017424381524324416, "adaptive_weight/frontier_coverage_5": 0.017424381524324416, "adaptive_weight/frontier_ece_reward": 0.13172760903835296, "adaptive_weight/frontier_entropy_batch_reward": 0.18580999970436096, "calibration/aurc": 0.3588262650357728, "calibration/batch_distribution_entropy": 0.9422539276380764, "calibration/batch_entropy_100bins": 0.6798601217797626, "calibration/batch_entropy_10bins": 0.9422539276380764, "calibration/batch_entropy_50bins": 0.7800342465320881, "calibration/batch_uniqueness": 0.8849484989114356, "calibration/buffer_distribution_entropy": 0.9623683344824145, "calibration/buffer_entropy_100bins": 0.6978108431612698, "calibration/buffer_entropy_10bins": 0.9623683344824145, "calibration/buffer_entropy_50bins": 0.7969227700066659, "calibration/confidence_entropy": 0.4530703545110395, "calibration/coverage@0%": 0.003910836594911937, "calibration/coverage@1%": 0.003910836594911937, "calibration/coverage@10%": 0.05782625978473581, "calibration/coverage@15%": 0.1914291829745597, "calibration/coverage@20%": 0.299706457925636, "calibration/coverage@25%": 0.3837137659001957, "calibration/coverage@30%": 0.43528238136007824, "calibration/coverage@5%": 0.003910836594911937, "calibration/ece": 0.13784669508550568, "calibration/mean_confidence": 0.40941150367161133, "calibration/prompt_uniqueness": 0.7607474717091571, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 542.8, "completions/max_terminated_length": 542.8, "completions/mean_length": 191.31513671875, "completions/mean_terminated_length": 191.31513671875, "completions/min_length": 82.6, "completions/min_terminated_length": 82.6, "epoch": 0.352, "grad_norm": 0.0009676101035438478, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 371580133.0, "reward": 0.7830458164215088, "reward_std": 0.0892082542181015, "rewards/accuracy_reward": 0.46435546875, "rewards/brier_reward": 0.7690168023109436, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003099285624921322, "rewards/frontier_coverage_1": 0.13656647205352784, "rewards/frontier_coverage_10": 0.13656647205352784, "rewards/frontier_coverage_15": 0.13656647205352784, "rewards/frontier_coverage_20": 0.13656647205352784, "rewards/frontier_coverage_25": 0.13656647205352784, "rewards/frontier_coverage_5": 0.13656647205352784, "rewards/frontier_ece_reward": 0.011311782151460647, "rewards/frontier_entropy_batch_reward": -0.05575864017009735, "signal/accuracy_reward/centered_abs_mean": 0.110345458984375, "signal/accuracy_reward/group_bin_occupancy": 0.17734375, "signal/accuracy_reward/group_std_mean": 0.14654679000377654, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0551727294921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0551727294921875, "signal/advantage_abs_mean": 0.06846952587366104, "signal/advantage_pre_scale_abs_mean": 0.06846952587366104, "signal/advantage_pre_scale_std": 0.10927639603614807, "signal/advantage_std": 0.10927639603614807, "signal/brier_reward/centered_abs_mean": 0.16186352968215942, "signal/brier_reward/group_bin_occupancy": 0.84140625, "signal/brier_reward/group_std_mean": 0.20741929709911347, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00957336314022541, "signal/brier_reward/weight": 0.05914327949285507, "signal/brier_reward/weighted_centered_abs_mean": 0.00957336314022541, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025628196075558663, "signal/frontier_aurc_reward/group_bin_occupancy": 0.703125, "signal/frontier_aurc_reward/group_std_mean": 0.004089434165507555, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.888393232249655e-05, "signal/frontier_aurc_reward/weight": 0.019072819501161575, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.888393232249655e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2231020450592041, "signal/frontier_coverage_1/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_1/group_std_mean": 0.28328863382339475, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038873269222676754, "signal/frontier_coverage_1/weight": 0.017424381524324416, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038873269222676754, "signal/frontier_coverage_10/centered_abs_mean": 0.2231020450592041, "signal/frontier_coverage_10/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_10/group_std_mean": 0.28328863382339475, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038873269222676754, "signal/frontier_coverage_10/weight": 0.017424381524324416, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038873269222676754, "signal/frontier_coverage_15/centered_abs_mean": 0.2231020450592041, "signal/frontier_coverage_15/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_15/group_std_mean": 0.28328863382339475, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038873269222676754, "signal/frontier_coverage_15/weight": 0.017424381524324416, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038873269222676754, "signal/frontier_coverage_20/centered_abs_mean": 0.2231020450592041, "signal/frontier_coverage_20/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_20/group_std_mean": 0.28328863382339475, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038873269222676754, "signal/frontier_coverage_20/weight": 0.017424381524324416, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038873269222676754, "signal/frontier_coverage_25/centered_abs_mean": 0.2231020450592041, "signal/frontier_coverage_25/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_25/group_std_mean": 0.28328863382339475, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038873269222676754, "signal/frontier_coverage_25/weight": 0.017424381524324416, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038873269222676754, "signal/frontier_coverage_5/centered_abs_mean": 0.2231020450592041, "signal/frontier_coverage_5/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_5/group_std_mean": 0.28328863382339475, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038873269222676754, "signal/frontier_coverage_5/weight": 0.017424381524324416, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038873269222676754, "signal/frontier_ece_reward/centered_abs_mean": 0.022969850897789003, "signal/frontier_ece_reward/group_bin_occupancy": 0.691015625, "signal/frontier_ece_reward/group_std_mean": 0.030405202880501747, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030254576820880176, "signal/frontier_ece_reward/weight": 0.13172760903835296, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030254576820880176, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08273435607552529, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.580859375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.1064729444682598, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01537502845749259, "signal/frontier_entropy_batch_reward/weight": 0.18580999970436096, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01537502845749259, "step": 110 }, { "adaptive_ema/accuracy_reward": 0.3808226709134879, "adaptive_ema/brier_reward": 0.6169466494056433, "adaptive_ema/format_reward": 0.8808724066231509, "adaptive_ema/frontier_aurc_reward": 0.11349249292544246, "adaptive_ema/frontier_coverage_1": 0.19190069321476277, "adaptive_ema/frontier_coverage_10": 0.19190069321476277, "adaptive_ema/frontier_coverage_15": 0.19190069321476277, "adaptive_ema/frontier_coverage_20": 0.19190069321476277, "adaptive_ema/frontier_coverage_25": 0.19190069321476277, "adaptive_ema/frontier_coverage_5": 0.19190069321476277, "adaptive_ema/frontier_ece_reward": 0.12340020382279851, "adaptive_ema/frontier_entropy_batch_reward": -0.21959110652164848, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.05806405767798424, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.019243112578988076, "adaptive_weight/frontier_coverage_1": 0.017541125789284705, "adaptive_weight/frontier_coverage_10": 0.017541125789284705, "adaptive_weight/frontier_coverage_15": 0.017541125789284705, "adaptive_weight/frontier_coverage_20": 0.017541125789284705, "adaptive_weight/frontier_coverage_25": 0.017541125789284705, "adaptive_weight/frontier_coverage_5": 0.017541125789284705, "adaptive_weight/frontier_ece_reward": 0.13287744224071502, "adaptive_weight/frontier_entropy_batch_reward": 0.1848686307668686, "calibration/aurc": 0.3854856134301831, "calibration/batch_distribution_entropy": 0.956228883497219, "calibration/batch_entropy_100bins": 0.6788869303045917, "calibration/batch_entropy_10bins": 0.956228883497219, "calibration/batch_entropy_50bins": 0.7826528516805218, "calibration/batch_uniqueness": 0.8898590087890625, "calibration/buffer_distribution_entropy": 0.9632650275282651, "calibration/buffer_entropy_100bins": 0.6999079540647883, "calibration/buffer_entropy_10bins": 0.9632650275282651, "calibration/buffer_entropy_50bins": 0.7985352523732561, "calibration/confidence_entropy": 0.4754072231936579, "calibration/coverage@0%": 0.004296875, "calibration/coverage@1%": 0.004296875, "calibration/coverage@10%": 0.004296875, "calibration/coverage@15%": 0.06640625, "calibration/coverage@20%": 0.1453125, "calibration/coverage@25%": 0.24609375, "calibration/coverage@30%": 0.3421875, "calibration/coverage@5%": 0.004296875, "calibration/ece": 0.14345156250000002, "calibration/mean_confidence": 0.4806421875, "calibration/prompt_uniqueness": 0.755712890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 709.2, "completions/max_terminated_length": 534.6, "completions/mean_length": 192.16748046875, "completions/mean_terminated_length": 191.9038848876953, "completions/min_length": 85.2, "completions/min_terminated_length": 85.2, "epoch": 0.368, "grad_norm": 0.0009233996388502419, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 388613400.0, "reward": 0.7953165411949158, "reward_std": 0.08573707342147827, "rewards/accuracy_reward": 0.49169921875, "rewards/brier_reward": 0.765993058681488, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003442125115543604, "rewards/frontier_coverage_1": 0.11713624447584152, "rewards/frontier_coverage_10": 0.11713624447584152, "rewards/frontier_coverage_15": 0.11713624447584152, "rewards/frontier_coverage_20": 0.11713624447584152, "rewards/frontier_coverage_25": 0.11713624447584152, "rewards/frontier_coverage_5": 0.11713624447584152, "rewards/frontier_ece_reward": 0.0102972861379385, "rewards/frontier_entropy_batch_reward": -0.0469327487051487, "signal/accuracy_reward/centered_abs_mean": 0.108538818359375, "signal/accuracy_reward/group_bin_occupancy": 0.1765625, "signal/accuracy_reward/group_std_mean": 0.14366440922021867, "signal/accuracy_reward/group_zero_std_frac": 0.5875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0542694091796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0542694091796875, "signal/advantage_abs_mean": 0.06664566695690155, "signal/advantage_pre_scale_abs_mean": 0.06664566695690155, "signal/advantage_pre_scale_std": 0.10758482664823532, "signal/advantage_std": 0.10758482664823532, "signal/brier_reward/centered_abs_mean": 0.1609862267971039, "signal/brier_reward/group_bin_occupancy": 0.849609375, "signal/brier_reward/group_std_mean": 0.20346853733062745, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.009347083792090417, "signal/brier_reward/weight": 0.05806405767798424, "signal/brier_reward/weighted_centered_abs_mean": 0.009347083792090417, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0030787172727286816, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7125, "signal/frontier_aurc_reward/group_std_mean": 0.004707864206284285, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.925965087953955e-05, "signal/frontier_aurc_reward/weight": 0.019243112578988076, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.925965087953955e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20927395224571227, "signal/frontier_coverage_1/group_bin_occupancy": 0.86875, "signal/frontier_coverage_1/group_std_mean": 0.2662800371646881, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036708842031657695, "signal/frontier_coverage_1/weight": 0.017541125789284705, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036708842031657695, "signal/frontier_coverage_10/centered_abs_mean": 0.20927395224571227, "signal/frontier_coverage_10/group_bin_occupancy": 0.86875, "signal/frontier_coverage_10/group_std_mean": 0.2662800371646881, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036708842031657695, "signal/frontier_coverage_10/weight": 0.017541125789284705, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036708842031657695, "signal/frontier_coverage_15/centered_abs_mean": 0.20927395224571227, "signal/frontier_coverage_15/group_bin_occupancy": 0.86875, "signal/frontier_coverage_15/group_std_mean": 0.2662800371646881, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036708842031657695, "signal/frontier_coverage_15/weight": 0.017541125789284705, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036708842031657695, "signal/frontier_coverage_20/centered_abs_mean": 0.20927395224571227, "signal/frontier_coverage_20/group_bin_occupancy": 0.86875, "signal/frontier_coverage_20/group_std_mean": 0.2662800371646881, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036708842031657695, "signal/frontier_coverage_20/weight": 0.017541125789284705, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036708842031657695, "signal/frontier_coverage_25/centered_abs_mean": 0.20927395224571227, "signal/frontier_coverage_25/group_bin_occupancy": 0.86875, "signal/frontier_coverage_25/group_std_mean": 0.2662800371646881, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036708842031657695, "signal/frontier_coverage_25/weight": 0.017541125789284705, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036708842031657695, "signal/frontier_coverage_5/centered_abs_mean": 0.20927395224571227, "signal/frontier_coverage_5/group_bin_occupancy": 0.86875, "signal/frontier_coverage_5/group_std_mean": 0.2662800371646881, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036708842031657695, "signal/frontier_coverage_5/weight": 0.017541125789284705, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036708842031657695, "signal/frontier_ece_reward/centered_abs_mean": 0.019026529788970948, "signal/frontier_ece_reward/group_bin_occupancy": 0.713671875, "signal/frontier_ece_reward/group_std_mean": 0.02441370189189911, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025277684442698957, "signal/frontier_ece_reward/weight": 0.13287744224071502, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025277684442698957, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07778175473213196, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5359375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.10077935457229614, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014381194859743119, "signal/frontier_entropy_batch_reward/weight": 0.1848686307668686, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014381194859743119, "step": 115 }, { "adaptive_ema/accuracy_reward": 0.38691835656165113, "adaptive_ema/brier_reward": 0.6242956807223472, "adaptive_ema/format_reward": 0.886702190326939, "adaptive_ema/frontier_aurc_reward": 0.10776341570590438, "adaptive_ema/frontier_coverage_1": 0.18755842962724928, "adaptive_ema/frontier_coverage_10": 0.18755842962724928, "adaptive_ema/frontier_coverage_15": 0.18755842962724928, "adaptive_ema/frontier_coverage_20": 0.18755842962724928, "adaptive_ema/frontier_coverage_25": 0.187511311432699, "adaptive_ema/frontier_coverage_5": 0.18755842962724928, "adaptive_ema/frontier_ece_reward": 0.117820152691086, "adaptive_ema/frontier_entropy_batch_reward": -0.21149949806803367, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.057041678577661514, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.01939864121377468, "adaptive_weight/frontier_coverage_1": 0.017663762718439103, "adaptive_weight/frontier_coverage_10": 0.017663762718439103, "adaptive_weight/frontier_coverage_15": 0.017663762718439103, "adaptive_weight/frontier_coverage_20": 0.017663762718439103, "adaptive_weight/frontier_coverage_25": 0.01766478829085827, "adaptive_weight/frontier_coverage_5": 0.017663762718439103, "adaptive_weight/frontier_ece_reward": 0.1339384913444519, "adaptive_weight/frontier_entropy_batch_reward": 0.18393758237361907, "calibration/aurc": 0.35493774799708466, "calibration/batch_distribution_entropy": 0.9491989441612088, "calibration/batch_entropy_100bins": 0.6622287773312955, "calibration/batch_entropy_10bins": 0.9491989441612088, "calibration/batch_entropy_50bins": 0.7679444207624779, "calibration/batch_uniqueness": 0.8827301025390625, "calibration/buffer_distribution_entropy": 0.9636707005952015, "calibration/buffer_entropy_100bins": 0.7013720619319166, "calibration/buffer_entropy_10bins": 0.9636707005952015, "calibration/buffer_entropy_50bins": 0.799605255599517, "calibration/confidence_entropy": 0.4570547443173941, "calibration/coverage@0%": 0.005078125, "calibration/coverage@1%": 0.005078125, "calibration/coverage@10%": 0.123046875, "calibration/coverage@15%": 0.201953125, "calibration/coverage@20%": 0.251171875, "calibration/coverage@25%": 0.29296875, "calibration/coverage@30%": 0.360546875, "calibration/coverage@5%": 0.028515625, "calibration/ece": 0.134556640625, "calibration/mean_confidence": 0.445193359375, "calibration/prompt_uniqueness": 0.736328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 959.6, "completions/max_terminated_length": 539.4, "completions/mean_length": 191.00859375, "completions/mean_terminated_length": 190.7458068847656, "completions/min_length": 85.6, "completions/min_terminated_length": 85.6, "epoch": 0.384, "grad_norm": 0.0009219791973009706, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 405425840.0, "reward": 0.8066781878471374, "reward_std": 0.0858098804950714, "rewards/accuracy_reward": 0.51943359375, "rewards/brier_reward": 0.7786778092384339, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0029779852367937563, "rewards/frontier_coverage_1": 0.10454831868410111, "rewards/frontier_coverage_10": 0.10454831868410111, "rewards/frontier_coverage_15": 0.10454831868410111, "rewards/frontier_coverage_20": 0.10454831868410111, "rewards/frontier_coverage_25": 0.10010432302951813, "rewards/frontier_coverage_5": 0.10454831868410111, "rewards/frontier_ece_reward": 0.010108662210404873, "rewards/frontier_entropy_batch_reward": -0.05299887377768755, "signal/accuracy_reward/centered_abs_mean": 0.103521728515625, "signal/accuracy_reward/group_bin_occupancy": 0.1796875, "signal/accuracy_reward/group_std_mean": 0.1436680018901825, "signal/accuracy_reward/group_zero_std_frac": 0.5625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0517608642578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0517608642578125, "signal/advantage_abs_mean": 0.06451480388641358, "signal/advantage_pre_scale_abs_mean": 0.06451480388641358, "signal/advantage_pre_scale_std": 0.10583080053329467, "signal/advantage_std": 0.10583080053329467, "signal/brier_reward/centered_abs_mean": 0.15401501059532166, "signal/brier_reward/group_bin_occupancy": 0.82265625, "signal/brier_reward/group_std_mean": 0.19831812977790833, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.008785775676369668, "signal/brier_reward/weight": 0.057041678577661514, "signal/brier_reward/weighted_centered_abs_mean": 0.008785775676369668, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0030430202838033437, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6984375, "signal/frontier_aurc_reward/group_std_mean": 0.004862629622220993, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.901477925362997e-05, "signal/frontier_aurc_reward/weight": 0.01939864121377468, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.901477925362997e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20419052243232727, "signal/frontier_coverage_1/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_1/group_std_mean": 0.2636923313140869, "signal/frontier_coverage_1/group_zero_std_frac": 0.003125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036070624366402624, "signal/frontier_coverage_1/weight": 0.017663762718439103, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036070624366402624, "signal/frontier_coverage_10/centered_abs_mean": 0.20419052243232727, "signal/frontier_coverage_10/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_10/group_std_mean": 0.2636923313140869, "signal/frontier_coverage_10/group_zero_std_frac": 0.003125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036070624366402624, "signal/frontier_coverage_10/weight": 0.017663762718439103, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036070624366402624, "signal/frontier_coverage_15/centered_abs_mean": 0.20419052243232727, "signal/frontier_coverage_15/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_15/group_std_mean": 0.2636923313140869, "signal/frontier_coverage_15/group_zero_std_frac": 0.003125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036070624366402624, "signal/frontier_coverage_15/weight": 0.017663762718439103, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036070624366402624, "signal/frontier_coverage_20/centered_abs_mean": 0.20419052243232727, "signal/frontier_coverage_20/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_20/group_std_mean": 0.2636923313140869, "signal/frontier_coverage_20/group_zero_std_frac": 0.003125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036070624366402624, "signal/frontier_coverage_20/weight": 0.017663762718439103, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036070624366402624, "signal/frontier_coverage_25/centered_abs_mean": 0.1982475072145462, "signal/frontier_coverage_25/group_bin_occupancy": 0.85703125, "signal/frontier_coverage_25/group_std_mean": 0.25631812810897825, "signal/frontier_coverage_25/group_zero_std_frac": 0.003125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003502086503431201, "signal/frontier_coverage_25/weight": 0.01766478829085827, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003502086503431201, "signal/frontier_coverage_5/centered_abs_mean": 0.20419052243232727, "signal/frontier_coverage_5/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_5/group_std_mean": 0.2636923313140869, "signal/frontier_coverage_5/group_zero_std_frac": 0.003125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036070624366402624, "signal/frontier_coverage_5/weight": 0.017663762718439103, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036070624366402624, "signal/frontier_ece_reward/centered_abs_mean": 0.015199101902544499, "signal/frontier_ece_reward/group_bin_occupancy": 0.739453125, "signal/frontier_ece_reward/group_std_mean": 0.019395126774907113, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020354266278445722, "signal/frontier_ece_reward/weight": 0.1339384913444519, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020354266278445722, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07814936712384224, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.619921875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.09951084926724434, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014378698635846376, "signal/frontier_entropy_batch_reward/weight": 0.18393758237361907, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014378698635846376, "step": 120 }, { "adaptive_ema/accuracy_reward": 0.39235090036761877, "adaptive_ema/brier_reward": 0.6316182781002241, "adaptive_ema/format_reward": 0.892235737098863, "adaptive_ema/frontier_aurc_reward": 0.10232412924196503, "adaptive_ema/frontier_coverage_1": 0.18421709359957616, "adaptive_ema/frontier_coverage_10": 0.18421709359957616, "adaptive_ema/frontier_coverage_15": 0.18421709359957616, "adaptive_ema/frontier_coverage_20": 0.18421709359957616, "adaptive_ema/frontier_coverage_25": 0.18374265269248752, "adaptive_ema/frontier_coverage_5": 0.18421709359957616, "adaptive_ema/frontier_ece_reward": 0.11247543002688846, "adaptive_ema/frontier_entropy_batch_reward": -0.2030708460654071, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.056043879687786104, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.019556624069809913, "adaptive_weight/frontier_coverage_1": 0.01777251176536083, "adaptive_weight/frontier_coverage_10": 0.01777251176536083, "adaptive_weight/frontier_coverage_15": 0.01777251176536083, "adaptive_weight/frontier_coverage_20": 0.01777251176536083, "adaptive_weight/frontier_coverage_25": 0.017782849073410035, "adaptive_weight/frontier_coverage_5": 0.01777251176536083, "adaptive_weight/frontier_ece_reward": 0.135024231672287, "adaptive_weight/frontier_entropy_batch_reward": 0.18302985429763793, "calibration/aurc": 0.4509882354254221, "calibration/batch_distribution_entropy": 0.9564365076653628, "calibration/batch_entropy_100bins": 0.6665790545329219, "calibration/batch_entropy_10bins": 0.9564365076653628, "calibration/batch_entropy_50bins": 0.7725879728895253, "calibration/batch_uniqueness": 0.887788942993016, "calibration/buffer_distribution_entropy": 0.9652585082100484, "calibration/buffer_entropy_100bins": 0.7032827252388143, "calibration/buffer_entropy_10bins": 0.9652585082100484, "calibration/buffer_entropy_50bins": 0.8014792047520682, "calibration/confidence_entropy": 0.4920077470967188, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.015234375, "calibration/coverage@30%": 0.0869515931372549, "calibration/coverage@5%": 0.0, "calibration/ece": 0.16312596060684545, "calibration/mean_confidence": 0.4642518884779939, "calibration/prompt_uniqueness": 0.7708412783902041, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 905.4, "completions/max_terminated_length": 519.4, "completions/mean_length": 191.51484375, "completions/mean_terminated_length": 191.1214630126953, "completions/min_length": 83.2, "completions/min_terminated_length": 83.2, "epoch": 0.4, "grad_norm": 0.000978235388174653, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 422423400.0, "reward": 0.7928371787071228, "reward_std": 0.09395273178815841, "rewards/accuracy_reward": 0.4919921875, "rewards/brier_reward": 0.764430582523346, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0035829836037009955, "rewards/frontier_coverage_1": 0.11412625126540661, "rewards/frontier_coverage_10": 0.11412625126540661, "rewards/frontier_coverage_15": 0.11412625126540661, "rewards/frontier_coverage_20": 0.11412625126540661, "rewards/frontier_coverage_25": 0.10572034269571304, "rewards/frontier_coverage_5": 0.11412625126540661, "rewards/frontier_ece_reward": 0.007939350325614214, "rewards/frontier_entropy_batch_reward": -0.04892009943723678, "signal/accuracy_reward/centered_abs_mean": 0.1263916015625, "signal/accuracy_reward/group_bin_occupancy": 0.18515625, "signal/accuracy_reward/group_std_mean": 0.16802475452423096, "signal/accuracy_reward/group_zero_std_frac": 0.51875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06319580078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06319580078125, "signal/advantage_abs_mean": 0.07194100320339203, "signal/advantage_pre_scale_abs_mean": 0.07194100320339203, "signal/advantage_pre_scale_std": 0.11499589532613755, "signal/advantage_std": 0.11499589532613755, "signal/brier_reward/centered_abs_mean": 0.1625375419855118, "signal/brier_reward/group_bin_occupancy": 0.832421875, "signal/brier_reward/group_std_mean": 0.20592648088932036, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0091098016127944, "signal/brier_reward/weight": 0.056043879687786104, "signal/brier_reward/weighted_centered_abs_mean": 0.0091098016127944, "signal/format_reward/centered_abs_mean": 0.00074462890625, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0018734002020210027, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000372314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0034305922221392394, "signal/frontier_aurc_reward/group_bin_occupancy": 0.72109375, "signal/frontier_aurc_reward/group_std_mean": 0.005304851569235325, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.709443987347185e-05, "signal/frontier_aurc_reward/weight": 0.019556624069809913, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.709443987347185e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21515123248100282, "signal/frontier_coverage_1/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_1/group_std_mean": 0.276702755689621, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003823632560670376, "signal/frontier_coverage_1/weight": 0.01777251176536083, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003823632560670376, "signal/frontier_coverage_10/centered_abs_mean": 0.21515123248100282, "signal/frontier_coverage_10/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_10/group_std_mean": 0.276702755689621, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003823632560670376, "signal/frontier_coverage_10/weight": 0.01777251176536083, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003823632560670376, "signal/frontier_coverage_15/centered_abs_mean": 0.21515123248100282, "signal/frontier_coverage_15/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_15/group_std_mean": 0.276702755689621, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003823632560670376, "signal/frontier_coverage_15/weight": 0.01777251176536083, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003823632560670376, "signal/frontier_coverage_20/centered_abs_mean": 0.21515123248100282, "signal/frontier_coverage_20/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_20/group_std_mean": 0.276702755689621, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003823632560670376, "signal/frontier_coverage_20/weight": 0.01777251176536083, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003823632560670376, "signal/frontier_coverage_25/centered_abs_mean": 0.19758794605731964, "signal/frontier_coverage_25/group_bin_occupancy": 0.865625, "signal/frontier_coverage_25/group_std_mean": 0.25453805923461914, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035135370679199697, "signal/frontier_coverage_25/weight": 0.017782849073410035, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035135370679199697, "signal/frontier_coverage_5/centered_abs_mean": 0.21515123248100282, "signal/frontier_coverage_5/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_5/group_std_mean": 0.276702755689621, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003823632560670376, "signal/frontier_coverage_5/weight": 0.01777251176536083, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003823632560670376, "signal/frontier_ece_reward/centered_abs_mean": 0.013926656730473042, "signal/frontier_ece_reward/group_bin_occupancy": 0.7546875, "signal/frontier_ece_reward/group_std_mean": 0.01783502697944641, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018803888699039817, "signal/frontier_ece_reward/weight": 0.135024231672287, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018803888699039817, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07931768745183945, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.59609375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.10206554159522056, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014510990865528584, "signal/frontier_entropy_batch_reward/weight": 0.18302985429763793, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014510990865528584, "step": 125 }, { "adaptive_ema/accuracy_reward": 0.3971258292856597, "adaptive_ema/brier_reward": 0.6381828016465828, "adaptive_ema/format_reward": 0.8974846743858107, "adaptive_ema/frontier_aurc_reward": 0.0971401174135465, "adaptive_ema/frontier_coverage_1": 0.18090152116493946, "adaptive_ema/frontier_coverage_10": 0.18090152116493946, "adaptive_ema/frontier_coverage_15": 0.18090152116493946, "adaptive_ema/frontier_coverage_20": 0.18090152116493946, "adaptive_ema/frontier_coverage_25": 0.1800799254664386, "adaptive_ema/frontier_coverage_5": 0.18090152116493946, "adaptive_ema/frontier_ece_reward": 0.1073283127693054, "adaptive_ema/frontier_entropy_batch_reward": -0.1952066626486843, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.055139760673046115, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.01970335878431797, "adaptive_weight/frontier_coverage_1": 0.017875409871339797, "adaptive_weight/frontier_coverage_10": 0.017875409871339797, "adaptive_weight/frontier_coverage_15": 0.017875409871339797, "adaptive_weight/frontier_coverage_20": 0.017875409871339797, "adaptive_weight/frontier_coverage_25": 0.017893340811133383, "adaptive_weight/frontier_coverage_5": 0.017875409871339797, "adaptive_weight/frontier_ece_reward": 0.1360406458377838, "adaptive_weight/frontier_entropy_batch_reward": 0.18214584290981292, "calibration/aurc": 0.3447294166901377, "calibration/batch_distribution_entropy": 0.9525498814443025, "calibration/batch_entropy_100bins": 0.6685293253297835, "calibration/batch_entropy_10bins": 0.9525498814443025, "calibration/batch_entropy_50bins": 0.7720460736699484, "calibration/batch_uniqueness": 0.88769145687729, "calibration/buffer_distribution_entropy": 0.9674000537650531, "calibration/buffer_entropy_100bins": 0.704135251096097, "calibration/buffer_entropy_10bins": 0.9674000537650531, "calibration/buffer_entropy_50bins": 0.8024988319224787, "calibration/confidence_entropy": 0.5013468191546062, "calibration/coverage@0%": 0.004296875, "calibration/coverage@1%": 0.004296875, "calibration/coverage@10%": 0.022274798189823874, "calibration/coverage@15%": 0.03594667318982388, "calibration/coverage@20%": 0.062118548189823874, "calibration/coverage@25%": 0.20910821306262228, "calibration/coverage@30%": 0.32719927226027395, "calibration/coverage@5%": 0.004296875, "calibration/ece": 0.10505189502813113, "calibration/mean_confidence": 0.47522064044153617, "calibration/prompt_uniqueness": 0.7703621918899584, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1135.2, "completions/max_terminated_length": 513.0, "completions/mean_length": 191.45498046875, "completions/mean_terminated_length": 190.92953491210938, "completions/min_length": 73.4, "completions/min_terminated_length": 73.4, "epoch": 0.416, "grad_norm": 0.0009625152451917529, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 439265083.0, "reward": 0.7997597455978394, "reward_std": 0.0898437261581421, "rewards/accuracy_reward": 0.5013671875, "rewards/brier_reward": 0.7667541861534118, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0030779951717704534, "rewards/frontier_coverage_1": 0.10926591604948044, "rewards/frontier_coverage_10": 0.10926591604948044, "rewards/frontier_coverage_15": 0.10926591604948044, "rewards/frontier_coverage_20": 0.10926591604948044, "rewards/frontier_coverage_25": 0.10277043804526328, "rewards/frontier_coverage_5": 0.10926591604948044, "rewards/frontier_ece_reward": 0.007529846765100956, "rewards/frontier_entropy_batch_reward": -0.03042619377374649, "signal/accuracy_reward/centered_abs_mean": 0.1254150390625, "signal/accuracy_reward/group_bin_occupancy": 0.18203125, "signal/accuracy_reward/group_std_mean": 0.16262999475002288, "signal/accuracy_reward/group_zero_std_frac": 0.54375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06270751953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06270751953125, "signal/advantage_abs_mean": 0.0694778598845005, "signal/advantage_pre_scale_abs_mean": 0.0694778598845005, "signal/advantage_pre_scale_std": 0.11225654482841492, "signal/advantage_std": 0.11225654482841492, "signal/brier_reward/centered_abs_mean": 0.1590313732624054, "signal/brier_reward/group_bin_occupancy": 0.83984375, "signal/brier_reward/group_std_mean": 0.20204126536846162, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.008768082037568093, "signal/brier_reward/weight": 0.055139760673046115, "signal/brier_reward/weighted_centered_abs_mean": 0.008768082037568093, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_bin_occupancy": 0.127734375, "signal/format_reward/group_std_mean": 0.003866990050300956, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.00288281855173409, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7328125, "signal/frontier_aurc_reward/group_std_mean": 0.004507267288863659, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.679482419509441e-05, "signal/frontier_aurc_reward/weight": 0.01970335878431797, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.679482419509441e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2235151559114456, "signal/frontier_coverage_1/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_1/group_std_mean": 0.28225297331809995, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003995717084035277, "signal/frontier_coverage_1/weight": 0.017875409871339797, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003995717084035277, "signal/frontier_coverage_10/centered_abs_mean": 0.2235151559114456, "signal/frontier_coverage_10/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_10/group_std_mean": 0.28225297331809995, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003995717084035277, "signal/frontier_coverage_10/weight": 0.017875409871339797, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003995717084035277, "signal/frontier_coverage_15/centered_abs_mean": 0.2235151559114456, "signal/frontier_coverage_15/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_15/group_std_mean": 0.28225297331809995, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003995717084035277, "signal/frontier_coverage_15/weight": 0.017875409871339797, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003995717084035277, "signal/frontier_coverage_20/centered_abs_mean": 0.2235151559114456, "signal/frontier_coverage_20/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_20/group_std_mean": 0.28225297331809995, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003995717084035277, "signal/frontier_coverage_20/weight": 0.017875409871339797, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003995717084035277, "signal/frontier_coverage_25/centered_abs_mean": 0.20612691044807435, "signal/frontier_coverage_25/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_25/group_std_mean": 0.26099815368652346, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036885889247059824, "signal/frontier_coverage_25/weight": 0.017893340811133383, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036885889247059824, "signal/frontier_coverage_5/centered_abs_mean": 0.2235151559114456, "signal/frontier_coverage_5/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_5/group_std_mean": 0.28225297331809995, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003995717084035277, "signal/frontier_coverage_5/weight": 0.017875409871339797, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003995717084035277, "signal/frontier_ece_reward/centered_abs_mean": 0.013178033754229546, "signal/frontier_ece_reward/group_bin_occupancy": 0.760546875, "signal/frontier_ece_reward/group_std_mean": 0.016739048436284064, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017928671557456255, "signal/frontier_ece_reward/weight": 0.1360406458377838, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017928671557456255, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06405057907104492, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.55703125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.0861910954117775, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011665340699255466, "signal/frontier_entropy_batch_reward/weight": 0.18214584290981292, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011665340699255466, "step": 130 }, { "adaptive_ema/accuracy_reward": 0.40389062378254226, "adaptive_ema/brier_reward": 0.6450117178895205, "adaptive_ema/format_reward": 0.9024889395514306, "adaptive_ema/frontier_aurc_reward": 0.09225001141692199, "adaptive_ema/frontier_coverage_1": 0.17700410985275458, "adaptive_ema/frontier_coverage_10": 0.17700410985275458, "adaptive_ema/frontier_coverage_15": 0.17700410985275458, "adaptive_ema/frontier_coverage_20": 0.17700410985275458, "adaptive_ema/frontier_coverage_25": 0.17592252013390136, "adaptive_ema/frontier_coverage_5": 0.17700410985275458, "adaptive_ema/frontier_ece_reward": 0.10250125354670095, "adaptive_ema/frontier_entropy_batch_reward": -0.18781069837660672, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.05418673381209373, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.019842178747057914, "adaptive_weight/frontier_coverage_1": 0.017989566549658775, "adaptive_weight/frontier_coverage_10": 0.017989566549658775, "adaptive_weight/frontier_coverage_15": 0.017989566549658775, "adaptive_weight/frontier_coverage_20": 0.017989566549658775, "adaptive_weight/frontier_coverage_25": 0.018013209104537964, "adaptive_weight/frontier_coverage_5": 0.017989566549658775, "adaptive_weight/frontier_ece_reward": 0.13699791133403777, "adaptive_weight/frontier_entropy_batch_reward": 0.1813121348619461, "calibration/aurc": 0.2798047773126514, "calibration/batch_distribution_entropy": 0.9594813797344054, "calibration/batch_entropy_100bins": 0.6832254986653468, "calibration/batch_entropy_10bins": 0.9594813797344054, "calibration/batch_entropy_50bins": 0.7874470601692688, "calibration/batch_uniqueness": 0.894732666015625, "calibration/buffer_distribution_entropy": 0.9692746712793632, "calibration/buffer_entropy_100bins": 0.7048346676881715, "calibration/buffer_entropy_10bins": 0.9692746712793632, "calibration/buffer_entropy_50bins": 0.8034103297172124, "calibration/confidence_entropy": 0.4639365779377324, "calibration/coverage@0%": 0.0046875, "calibration/coverage@1%": 0.0046875, "calibration/coverage@10%": 0.14921875, "calibration/coverage@15%": 0.215625, "calibration/coverage@20%": 0.282421875, "calibration/coverage@25%": 0.351953125, "calibration/coverage@30%": 0.516015625, "calibration/coverage@5%": 0.05, "calibration/ece": 0.117168359375, "calibration/mean_confidence": 0.496183203125, "calibration/prompt_uniqueness": 0.74169921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1146.8, "completions/max_terminated_length": 494.2, "completions/mean_length": 190.58916015625, "completions/mean_terminated_length": 190.19492797851564, "completions/min_length": 86.6, "completions/min_terminated_length": 86.6, "epoch": 0.432, "grad_norm": 0.0014595247339457273, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 456231052.0, "reward": 0.8136967062950134, "reward_std": 0.08533284813165665, "rewards/accuracy_reward": 0.53837890625, "rewards/brier_reward": 0.7841731548309326, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002618259796872735, "rewards/frontier_coverage_1": 0.10985348746180534, "rewards/frontier_coverage_10": 0.10985348746180534, "rewards/frontier_coverage_15": 0.10985348746180534, "rewards/frontier_coverage_20": 0.10985348746180534, "rewards/frontier_coverage_25": 0.10386107414960861, "rewards/frontier_coverage_5": 0.10985348746180534, "rewards/frontier_ece_reward": 0.00937036368995905, "rewards/frontier_entropy_batch_reward": -0.060450931265950206, "signal/accuracy_reward/centered_abs_mean": 0.110687255859375, "signal/accuracy_reward/group_bin_occupancy": 0.1796875, "signal/accuracy_reward/group_std_mean": 0.14950263351202012, "signal/accuracy_reward/group_zero_std_frac": 0.5625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0553436279296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0553436279296875, "signal/advantage_abs_mean": 0.06546112969517708, "signal/advantage_pre_scale_abs_mean": 0.06546112969517708, "signal/advantage_pre_scale_std": 0.10559684187173843, "signal/advantage_std": 0.10559684187173843, "signal/brier_reward/centered_abs_mean": 0.15093303322792054, "signal/brier_reward/group_bin_occupancy": 0.82421875, "signal/brier_reward/group_std_mean": 0.1923585206270218, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.008179661072790622, "signal/brier_reward/weight": 0.05418673381209373, "signal/brier_reward/weighted_centered_abs_mean": 0.008179661072790622, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002759553166106343, "signal/frontier_aurc_reward/group_bin_occupancy": 0.719921875, "signal/frontier_aurc_reward/group_std_mean": 0.004286598227918148, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.476091318996623e-05, "signal/frontier_aurc_reward/weight": 0.019842178747057914, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.476091318996623e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20907386541366577, "signal/frontier_coverage_1/group_bin_occupancy": 0.85390625, "signal/frontier_coverage_1/group_std_mean": 0.2689059257507324, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037610134109854697, "signal/frontier_coverage_1/weight": 0.017989566549658775, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037610134109854697, "signal/frontier_coverage_10/centered_abs_mean": 0.20907386541366577, "signal/frontier_coverage_10/group_bin_occupancy": 0.85390625, "signal/frontier_coverage_10/group_std_mean": 0.2689059257507324, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037610134109854697, "signal/frontier_coverage_10/weight": 0.017989566549658775, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037610134109854697, "signal/frontier_coverage_15/centered_abs_mean": 0.20907386541366577, "signal/frontier_coverage_15/group_bin_occupancy": 0.85390625, "signal/frontier_coverage_15/group_std_mean": 0.2689059257507324, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037610134109854697, "signal/frontier_coverage_15/weight": 0.017989566549658775, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037610134109854697, "signal/frontier_coverage_20/centered_abs_mean": 0.20907386541366577, "signal/frontier_coverage_20/group_bin_occupancy": 0.85390625, "signal/frontier_coverage_20/group_std_mean": 0.2689059257507324, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037610134109854697, "signal/frontier_coverage_20/weight": 0.017989566549658775, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037610134109854697, "signal/frontier_coverage_25/centered_abs_mean": 0.19255250692367554, "signal/frontier_coverage_25/group_bin_occupancy": 0.85546875, "signal/frontier_coverage_25/group_std_mean": 0.2472353994846344, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003468307899311185, "signal/frontier_coverage_25/weight": 0.018013209104537964, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003468307899311185, "signal/frontier_coverage_5/centered_abs_mean": 0.20907386541366577, "signal/frontier_coverage_5/group_bin_occupancy": 0.85390625, "signal/frontier_coverage_5/group_std_mean": 0.2689059257507324, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037610134109854697, "signal/frontier_coverage_5/weight": 0.017989566549658775, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037610134109854697, "signal/frontier_ece_reward/centered_abs_mean": 0.01300532352179289, "signal/frontier_ece_reward/group_bin_occupancy": 0.746875, "signal/frontier_ece_reward/group_std_mean": 0.016403328627347946, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017816227162256836, "signal/frontier_ece_reward/weight": 0.13699791133403777, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017816227162256836, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09048456139862537, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.55234375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.11550155356526375, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01640178356319666, "signal/frontier_entropy_batch_reward/weight": 0.1813121348619461, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01640178356319666, "step": 135 }, { "adaptive_ema/accuracy_reward": 0.40901087552425236, "adaptive_ema/brier_reward": 0.6516210816848998, "adaptive_ema/format_reward": 0.907253467880264, "adaptive_ema/frontier_aurc_reward": 0.08757274590309962, "adaptive_ema/frontier_coverage_1": 0.17444973559060695, "adaptive_ema/frontier_coverage_10": 0.17444973559060695, "adaptive_ema/frontier_coverage_15": 0.17444973559060695, "adaptive_ema/frontier_coverage_20": 0.17444973559060695, "adaptive_ema/frontier_coverage_25": 0.1731865165107393, "adaptive_ema/frontier_coverage_5": 0.17444973559060695, "adaptive_ema/frontier_ece_reward": 0.09789230437137501, "adaptive_ema/frontier_entropy_batch_reward": -0.18140109903372353, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.05326752662658692, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.01997806802392006, "adaptive_weight/frontier_coverage_1": 0.01807584725320339, "adaptive_weight/frontier_coverage_10": 0.01807584725320339, "adaptive_weight/frontier_coverage_15": 0.01807584725320339, "adaptive_weight/frontier_coverage_20": 0.01807584725320339, "adaptive_weight/frontier_coverage_25": 0.01810350678861141, "adaptive_weight/frontier_coverage_5": 0.01807584725320339, "adaptive_weight/frontier_ece_reward": 0.13793377280235292, "adaptive_weight/frontier_entropy_batch_reward": 0.18063787817955018, "calibration/aurc": 0.2962632427860098, "calibration/batch_distribution_entropy": 0.9570370964821228, "calibration/batch_entropy_100bins": 0.6842356187694494, "calibration/batch_entropy_10bins": 0.9570370964821228, "calibration/batch_entropy_50bins": 0.7894500856200997, "calibration/batch_uniqueness": 0.8935943603515625, "calibration/buffer_distribution_entropy": 0.9709373362653174, "calibration/buffer_entropy_100bins": 0.7063763335396203, "calibration/buffer_entropy_10bins": 0.9709373362653174, "calibration/buffer_entropy_50bins": 0.8050762497775716, "calibration/confidence_entropy": 0.4888197990694086, "calibration/coverage@0%": 0.00546875, "calibration/coverage@1%": 0.00546875, "calibration/coverage@10%": 0.07578125, "calibration/coverage@15%": 0.109375, "calibration/coverage@20%": 0.181640625, "calibration/coverage@25%": 0.293359375, "calibration/coverage@30%": 0.449609375, "calibration/coverage@5%": 0.0265625, "calibration/ece": 0.144036328125, "calibration/mean_confidence": 0.529510546875, "calibration/prompt_uniqueness": 0.774169921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 769.4, "completions/max_terminated_length": 656.2, "completions/mean_length": 197.3052734375, "completions/mean_terminated_length": 196.52201232910156, "completions/min_length": 89.8, "completions/min_terminated_length": 89.8, "epoch": 0.448, "grad_norm": 0.0010398230515420437, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 473204258.0, "reward": 0.802183473110199, "reward_std": 0.08431367427110673, "rewards/accuracy_reward": 0.50546875, "rewards/brier_reward": 0.7795440316200256, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0031814999878406526, "rewards/frontier_coverage_1": 0.12466348260641098, "rewards/frontier_coverage_10": 0.12466348260641098, "rewards/frontier_coverage_15": 0.12466348260641098, "rewards/frontier_coverage_20": 0.12466348260641098, "rewards/frontier_coverage_25": 0.12040752172470093, "rewards/frontier_coverage_5": 0.12466348260641098, "rewards/frontier_ece_reward": 0.008025220409035683, "rewards/frontier_entropy_batch_reward": -0.03533042259514332, "signal/accuracy_reward/centered_abs_mean": 0.112109375, "signal/accuracy_reward/group_bin_occupancy": 0.17734375, "signal/accuracy_reward/group_std_mean": 0.14760722517967223, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0560546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0560546875, "signal/advantage_abs_mean": 0.06522256284952163, "signal/advantage_pre_scale_abs_mean": 0.06522256284952163, "signal/advantage_pre_scale_std": 0.10748258531093598, "signal/advantage_std": 0.10748258531093598, "signal/brier_reward/centered_abs_mean": 0.15731483101844787, "signal/brier_reward/group_bin_occupancy": 0.836328125, "signal/brier_reward/group_std_mean": 0.19994543492794037, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.008379863202571869, "signal/brier_reward/weight": 0.05326752662658692, "signal/brier_reward/weighted_centered_abs_mean": 0.008379863202571869, "signal/format_reward/centered_abs_mean": 0.00101318359375, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0017052460461854935, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000506591796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000506591796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.003091309033334255, "signal/frontier_aurc_reward/group_bin_occupancy": 0.702734375, "signal/frontier_aurc_reward/group_std_mean": 0.0049549748189747335, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.173901856527663e-05, "signal/frontier_aurc_reward/weight": 0.01997806802392006, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.173901856527663e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21405775845050812, "signal/frontier_coverage_1/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_1/group_std_mean": 0.27364026606082914, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038695004768669605, "signal/frontier_coverage_1/weight": 0.01807584725320339, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038695004768669605, "signal/frontier_coverage_10/centered_abs_mean": 0.21405775845050812, "signal/frontier_coverage_10/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_10/group_std_mean": 0.27364026606082914, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038695004768669605, "signal/frontier_coverage_10/weight": 0.01807584725320339, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038695004768669605, "signal/frontier_coverage_15/centered_abs_mean": 0.21405775845050812, "signal/frontier_coverage_15/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_15/group_std_mean": 0.27364026606082914, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038695004768669605, "signal/frontier_coverage_15/weight": 0.01807584725320339, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038695004768669605, "signal/frontier_coverage_20/centered_abs_mean": 0.21405775845050812, "signal/frontier_coverage_20/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_20/group_std_mean": 0.27364026606082914, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038695004768669605, "signal/frontier_coverage_20/weight": 0.01807584725320339, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038695004768669605, "signal/frontier_coverage_25/centered_abs_mean": 0.19044365584850312, "signal/frontier_coverage_25/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_25/group_std_mean": 0.24385970830917358, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034478874877095222, "signal/frontier_coverage_25/weight": 0.01810350678861141, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034478874877095222, "signal/frontier_coverage_5/centered_abs_mean": 0.21405775845050812, "signal/frontier_coverage_5/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_5/group_std_mean": 0.27364026606082914, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038695004768669605, "signal/frontier_coverage_5/weight": 0.01807584725320339, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038695004768669605, "signal/frontier_ece_reward/centered_abs_mean": 0.01259579136967659, "signal/frontier_ece_reward/group_bin_occupancy": 0.763671875, "signal/frontier_ece_reward/group_std_mean": 0.015973252430558204, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017373360227793456, "signal/frontier_ece_reward/weight": 0.13793377280235292, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017373360227793456, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0651637777686119, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.58046875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.08384880423545837, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011776900757104159, "signal/frontier_entropy_batch_reward/weight": 0.18063787817955018, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011776900757104159, "step": 140 }, { "adaptive_ema/accuracy_reward": 0.41331912438572094, "adaptive_ema/brier_reward": 0.6573604978214818, "adaptive_ema/format_reward": 0.9117732413333242, "adaptive_ema/frontier_aurc_reward": 0.08311509842616054, "adaptive_ema/frontier_coverage_1": 0.17176355954378422, "adaptive_ema/frontier_coverage_10": 0.17176355954378422, "adaptive_ema/frontier_coverage_15": 0.17176355954378422, "adaptive_ema/frontier_coverage_20": 0.17176355954378422, "adaptive_ema/frontier_coverage_25": 0.1701984387803478, "adaptive_ema/frontier_coverage_5": 0.17176355954378422, "adaptive_ema/frontier_ece_reward": 0.0934477810133228, "adaptive_ema/frontier_entropy_batch_reward": -0.17472138935692122, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.05246995091438293, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.020106296986341476, "adaptive_weight/frontier_coverage_1": 0.018162321671843527, "adaptive_weight/frontier_coverage_10": 0.018162321671843527, "adaptive_weight/frontier_coverage_15": 0.018162321671843527, "adaptive_weight/frontier_coverage_20": 0.018162321671843527, "adaptive_weight/frontier_coverage_25": 0.0181966457515955, "adaptive_weight/frontier_coverage_5": 0.018162321671843527, "adaptive_weight/frontier_ece_reward": 0.1388248085975647, "adaptive_weight/frontier_entropy_batch_reward": 0.1798906832933426, "calibration/aurc": 0.4353696800721244, "calibration/batch_distribution_entropy": 0.9603827125826234, "calibration/batch_entropy_100bins": 0.6827533765788634, "calibration/batch_entropy_10bins": 0.9603827125826234, "calibration/batch_entropy_50bins": 0.7879613603432448, "calibration/batch_uniqueness": 0.8935943603515625, "calibration/buffer_distribution_entropy": 0.9722251449187385, "calibration/buffer_entropy_100bins": 0.707843130215015, "calibration/buffer_entropy_10bins": 0.9722251449187385, "calibration/buffer_entropy_50bins": 0.8066064119008413, "calibration/confidence_entropy": 0.48914448804111155, "calibration/coverage@0%": 0.00234375, "calibration/coverage@1%": 0.00234375, "calibration/coverage@10%": 0.00234375, "calibration/coverage@15%": 0.02890625, "calibration/coverage@20%": 0.038671875, "calibration/coverage@25%": 0.140234375, "calibration/coverage@30%": 0.254296875, "calibration/coverage@5%": 0.00234375, "calibration/ece": 0.14485286458333332, "calibration/mean_confidence": 0.4620638020833333, "calibration/prompt_uniqueness": 0.758447265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 886.6, "completions/max_terminated_length": 469.4, "completions/mean_length": 198.75224609375, "completions/mean_terminated_length": 198.3603759765625, "completions/min_length": 85.8, "completions/min_terminated_length": 85.8, "epoch": 0.464, "grad_norm": 0.0009978722082450986, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 490410297.0, "reward": 0.7703938245773315, "reward_std": 0.08229008316993713, "rewards/accuracy_reward": 0.45380859375, "rewards/brier_reward": 0.7603809714317322, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0039552615489810705, "rewards/frontier_coverage_1": 0.14377658516168595, "rewards/frontier_coverage_10": 0.14377658516168595, "rewards/frontier_coverage_15": 0.14377658516168595, "rewards/frontier_coverage_20": 0.14377658516168595, "rewards/frontier_coverage_25": 0.12820575162768363, "rewards/frontier_coverage_5": 0.14377658516168595, "rewards/frontier_ece_reward": 0.006863876525312662, "rewards/frontier_entropy_batch_reward": -0.07018293291330338, "signal/accuracy_reward/centered_abs_mean": 0.094793701171875, "signal/accuracy_reward/group_bin_occupancy": 0.173828125, "signal/accuracy_reward/group_std_mean": 0.13010418564081191, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0473968505859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0473968505859375, "signal/advantage_abs_mean": 0.06305078566074371, "signal/advantage_pre_scale_abs_mean": 0.06305078566074371, "signal/advantage_pre_scale_std": 0.10240222066640854, "signal/advantage_std": 0.10240222066640854, "signal/brier_reward/centered_abs_mean": 0.15986269414424897, "signal/brier_reward/group_bin_occupancy": 0.845703125, "signal/brier_reward/group_std_mean": 0.20299112200737, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.008388944528996944, "signal/brier_reward/weight": 0.05246995091438293, "signal/brier_reward/weighted_centered_abs_mean": 0.008388944528996944, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0034380458761006593, "signal/frontier_aurc_reward/group_bin_occupancy": 0.691015625, "signal/frontier_aurc_reward/group_std_mean": 0.005427456274628639, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.912858807481826e-05, "signal/frontier_aurc_reward/weight": 0.020106296986341476, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.912858807481826e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20949412882328033, "signal/frontier_coverage_1/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_1/group_std_mean": 0.26894415616989137, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003804934723302722, "signal/frontier_coverage_1/weight": 0.018162321671843527, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003804934723302722, "signal/frontier_coverage_10/centered_abs_mean": 0.20949412882328033, "signal/frontier_coverage_10/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_10/group_std_mean": 0.26894415616989137, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003804934723302722, "signal/frontier_coverage_10/weight": 0.018162321671843527, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003804934723302722, "signal/frontier_coverage_15/centered_abs_mean": 0.20949412882328033, "signal/frontier_coverage_15/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_15/group_std_mean": 0.26894415616989137, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003804934723302722, "signal/frontier_coverage_15/weight": 0.018162321671843527, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003804934723302722, "signal/frontier_coverage_20/centered_abs_mean": 0.20949412882328033, "signal/frontier_coverage_20/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_20/group_std_mean": 0.26894415616989137, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003804934723302722, "signal/frontier_coverage_20/weight": 0.018162321671843527, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003804934723302722, "signal/frontier_coverage_25/centered_abs_mean": 0.1845701038837433, "signal/frontier_coverage_25/group_bin_occupancy": 0.871875, "signal/frontier_coverage_25/group_std_mean": 0.2373584806919098, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00335861025378108, "signal/frontier_coverage_25/weight": 0.0181966457515955, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00335861025378108, "signal/frontier_coverage_5/centered_abs_mean": 0.20949412882328033, "signal/frontier_coverage_5/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_5/group_std_mean": 0.26894415616989137, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003804934723302722, "signal/frontier_coverage_5/weight": 0.018162321671843527, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003804934723302722, "signal/frontier_ece_reward/centered_abs_mean": 0.012350363284349441, "signal/frontier_ece_reward/group_bin_occupancy": 0.775390625, "signal/frontier_ece_reward/group_std_mean": 0.015785422176122665, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001714510377496481, "signal/frontier_ece_reward/weight": 0.1388248085975647, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001714510377496481, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10670888125896454, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.540234375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.13525095582008362, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019193359836935996, "signal/frontier_entropy_batch_reward/weight": 0.1798906832933426, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019193359836935996, "step": 145 }, { "adaptive_ema/accuracy_reward": 0.4158360108171112, "adaptive_ema/brier_reward": 0.662379968983462, "adaptive_ema/format_reward": 0.9160760948369102, "adaptive_ema/frontier_aurc_reward": 0.07885424522936033, "adaptive_ema/frontier_coverage_1": 0.170121992197436, "adaptive_ema/frontier_coverage_10": 0.170121992197436, "adaptive_ema/frontier_coverage_15": 0.170121992197436, "adaptive_ema/frontier_coverage_20": 0.170121992197436, "adaptive_ema/frontier_coverage_25": 0.1679290612428575, "adaptive_ema/frontier_coverage_5": 0.170121992197436, "adaptive_ema/frontier_ece_reward": 0.0892171161257757, "adaptive_ema/frontier_entropy_batch_reward": -0.16944830561177496, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.051763904839754106, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.02022419050335884, "adaptive_weight/frontier_coverage_1": 0.018220360577106475, "adaptive_weight/frontier_coverage_10": 0.018220360577106475, "adaptive_weight/frontier_coverage_15": 0.018220360577106475, "adaptive_weight/frontier_coverage_20": 0.018220360577106475, "adaptive_weight/frontier_coverage_25": 0.018268508464097978, "adaptive_weight/frontier_coverage_5": 0.018220360577106475, "adaptive_weight/frontier_ece_reward": 0.13964154720306396, "adaptive_weight/frontier_entropy_batch_reward": 0.17930004000663757, "calibration/aurc": 0.30436075115770805, "calibration/batch_distribution_entropy": 0.9680437886073019, "calibration/batch_entropy_100bins": 0.6929306685782016, "calibration/batch_entropy_10bins": 0.9680437886073019, "calibration/batch_entropy_50bins": 0.7983824179166977, "calibration/batch_uniqueness": 0.8958013507292947, "calibration/buffer_distribution_entropy": 0.9731618760742794, "calibration/buffer_entropy_100bins": 0.7091474549089483, "calibration/buffer_entropy_10bins": 0.9731618760742794, "calibration/buffer_entropy_50bins": 0.8079997262518148, "calibration/confidence_entropy": 0.46800119826295006, "calibration/coverage@0%": 0.00625, "calibration/coverage@1%": 0.00625, "calibration/coverage@10%": 0.042578125, "calibration/coverage@15%": 0.13203125, "calibration/coverage@20%": 0.33750458659491195, "calibration/coverage@25%": 0.43634647137964777, "calibration/coverage@30%": 0.4824532167318982, "calibration/coverage@5%": 0.0140625, "calibration/ece": 0.15203426183217936, "calibration/mean_confidence": 0.478522489635531, "calibration/prompt_uniqueness": 0.7592241459905047, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 926.2, "completions/max_terminated_length": 550.8, "completions/mean_length": 199.7640625, "completions/mean_terminated_length": 199.37288513183594, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.48, "grad_norm": 0.0010686474852263927, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 507503913.0, "reward": 0.7970785617828369, "reward_std": 0.0882499486207962, "rewards/accuracy_reward": 0.5107421875, "rewards/brier_reward": 0.7666718006134033, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0033504603896290066, "rewards/frontier_coverage_1": 0.11392012536525727, "rewards/frontier_coverage_10": 0.11392012536525727, "rewards/frontier_coverage_15": 0.11392012536525727, "rewards/frontier_coverage_20": 0.11392012536525727, "rewards/frontier_coverage_25": 0.10631907731294632, "rewards/frontier_coverage_5": 0.11392012536525727, "rewards/frontier_ece_reward": 0.007587762642651796, "rewards/frontier_entropy_batch_reward": -0.06213836595416069, "signal/accuracy_reward/centered_abs_mean": 0.11937255859375, "signal/accuracy_reward/group_bin_occupancy": 0.182421875, "signal/accuracy_reward/group_std_mean": 0.15820908844470977, "signal/accuracy_reward/group_zero_std_frac": 0.540625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.059686279296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.059686279296875, "signal/advantage_abs_mean": 0.067903071641922, "signal/advantage_pre_scale_abs_mean": 0.067903071641922, "signal/advantage_pre_scale_std": 0.10903761386871338, "signal/advantage_std": 0.10903761386871338, "signal/brier_reward/centered_abs_mean": 0.1605544239282608, "signal/brier_reward/group_bin_occupancy": 0.81796875, "signal/brier_reward/group_std_mean": 0.204882350564003, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.008311640471220016, "signal/brier_reward/weight": 0.051763904839754106, "signal/brier_reward/weighted_centered_abs_mean": 0.008311640471220016, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032434612046927215, "signal/frontier_aurc_reward/group_bin_occupancy": 0.708984375, "signal/frontier_aurc_reward/group_std_mean": 0.005121718998998404, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.559158791787922e-05, "signal/frontier_aurc_reward/weight": 0.02022419050335884, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.559158791787922e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22300857603549956, "signal/frontier_coverage_1/group_bin_occupancy": 0.842578125, "signal/frontier_coverage_1/group_std_mean": 0.2856623888015747, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004063255805522204, "signal/frontier_coverage_1/weight": 0.018220360577106475, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004063255805522204, "signal/frontier_coverage_10/centered_abs_mean": 0.22300857603549956, "signal/frontier_coverage_10/group_bin_occupancy": 0.842578125, "signal/frontier_coverage_10/group_std_mean": 0.2856623888015747, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004063255805522204, "signal/frontier_coverage_10/weight": 0.018220360577106475, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004063255805522204, "signal/frontier_coverage_15/centered_abs_mean": 0.22300857603549956, "signal/frontier_coverage_15/group_bin_occupancy": 0.842578125, "signal/frontier_coverage_15/group_std_mean": 0.2856623888015747, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004063255805522204, "signal/frontier_coverage_15/weight": 0.018220360577106475, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004063255805522204, "signal/frontier_coverage_20/centered_abs_mean": 0.22300857603549956, "signal/frontier_coverage_20/group_bin_occupancy": 0.842578125, "signal/frontier_coverage_20/group_std_mean": 0.2856623888015747, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004063255805522204, "signal/frontier_coverage_20/weight": 0.018220360577106475, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004063255805522204, "signal/frontier_coverage_25/centered_abs_mean": 0.19893572628498077, "signal/frontier_coverage_25/group_bin_occupancy": 0.837109375, "signal/frontier_coverage_25/group_std_mean": 0.25566576421260834, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003634166205301881, "signal/frontier_coverage_25/weight": 0.018268508464097978, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003634166205301881, "signal/frontier_coverage_5/centered_abs_mean": 0.22300857603549956, "signal/frontier_coverage_5/group_bin_occupancy": 0.842578125, "signal/frontier_coverage_5/group_std_mean": 0.2856623888015747, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004063255805522204, "signal/frontier_coverage_5/weight": 0.018220360577106475, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004063255805522204, "signal/frontier_ece_reward/centered_abs_mean": 0.01263709794729948, "signal/frontier_ece_reward/group_bin_occupancy": 0.763671875, "signal/frontier_ece_reward/group_std_mean": 0.016002984531223775, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017646895488724113, "signal/frontier_ece_reward/weight": 0.13964154720306396, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017646895488724113, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08762557096779347, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5359375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.11139658465981483, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015710269659757616, "signal/frontier_entropy_batch_reward/weight": 0.17930004000663757, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015710269659757616, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.5294038132117689, "eval_calibration/batch_distribution_entropy": 0.9063057403122545, "eval_calibration/batch_entropy_100bins": 0.565109525945698, "eval_calibration/batch_entropy_10bins": 0.9063057403122545, "eval_calibration/batch_entropy_50bins": 0.6576108922402973, "eval_calibration/batch_uniqueness": 0.82421875, "eval_calibration/buffer_distribution_entropy": 0.9738875723935155, "eval_calibration/buffer_entropy_100bins": 0.7100046469801263, "eval_calibration/buffer_entropy_10bins": 0.9738875723935155, "eval_calibration/buffer_entropy_50bins": 0.8088772762097054, "eval_calibration/confidence_entropy": 0.455809067666528, "eval_calibration/coverage@0%": 0.015625, "eval_calibration/coverage@1%": 0.015625, "eval_calibration/coverage@10%": 0.015625, "eval_calibration/coverage@15%": 0.015625, "eval_calibration/coverage@20%": 0.0625, "eval_calibration/coverage@25%": 0.0859375, "eval_calibration/coverage@30%": 0.09375, "eval_calibration/coverage@5%": 0.015625, "eval_calibration/ece": 0.191640625, "eval_calibration/mean_confidence": 0.423828125, "eval_calibration/prompt_uniqueness": 0.82421875, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 342.25, "eval_completions/max_terminated_length": 342.25, "eval_completions/mean_length": 201.73026657104492, "eval_completions/mean_terminated_length": 201.73026657104492, "eval_completions/min_length": 102.25, "eval_completions/min_terminated_length": 102.25, "eval_loss": 0.0, "eval_num_tokens": 507503913.0, "eval_reward": 0.7298808097839355, "eval_reward_std": 0.2223692536354065, "eval_rewards/accuracy_reward": 0.4140625, "eval_rewards/brier_reward": 0.7809019684791565, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0038445236277766526, "eval_rewards/frontier_coverage_1": 0.1827796958386898, "eval_rewards/frontier_coverage_10": 0.1827796958386898, "eval_rewards/frontier_coverage_15": 0.1827796958386898, "eval_rewards/frontier_coverage_20": 0.1827796958386898, "eval_rewards/frontier_coverage_25": 0.14920702949166298, "eval_rewards/frontier_coverage_5": 0.1827796958386898, "eval_rewards/frontier_ece_reward": 0.00868905265815556, "eval_rewards/frontier_entropy_batch_reward": -0.2114410400390625, "eval_runtime": 19.1534, "eval_samples_per_second": 26.105, "eval_signal/accuracy_reward/centered_abs_mean": 0.465087890625, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4892200380563736, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2325439453125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2325439453125, "eval_signal/advantage_abs_mean": 0.1968984156847, "eval_signal/advantage_pre_scale_abs_mean": 0.1968984156847, "eval_signal/advantage_pre_scale_std": 0.22023877874016762, "eval_signal/advantage_std": 0.22023877874016762, "eval_signal/brier_reward/centered_abs_mean": 0.1967528983950615, "eval_signal/brier_reward/group_bin_occupancy": 0.875, "eval_signal/brier_reward/group_std_mean": 0.25220654532313347, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010119047947227955, "eval_signal/brier_reward/weight": 0.05143023654818535, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.010119047947227955, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004891375545412302, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.734375, "eval_signal/frontier_aurc_reward/group_std_mean": 0.008528004633262753, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.914291149470955e-05, "eval_signal/frontier_aurc_reward/weight": 0.020268922671675682, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.914291149470955e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3524972200393677, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9296875, "eval_signal/frontier_coverage_1/group_std_mean": 0.4465137869119644, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006434518378227949, "eval_signal/frontier_coverage_1/weight": 0.018254097551107407, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006434518378227949, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3524972200393677, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9296875, "eval_signal/frontier_coverage_10/group_std_mean": 0.4465137869119644, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006434518378227949, "eval_signal/frontier_coverage_10/weight": 0.018254097551107407, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006434518378227949, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3524972200393677, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9296875, "eval_signal/frontier_coverage_15/group_std_mean": 0.4465137869119644, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006434518378227949, "eval_signal/frontier_coverage_15/weight": 0.018254097551107407, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006434518378227949, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3524972200393677, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9296875, "eval_signal/frontier_coverage_20/group_std_mean": 0.4465137869119644, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006434518378227949, "eval_signal/frontier_coverage_20/weight": 0.018254097551107407, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006434518378227949, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.29581238329410553, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9140625, "eval_signal/frontier_coverage_25/group_std_mean": 0.37883658707141876, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005414659972302616, "eval_signal/frontier_coverage_25/weight": 0.0183043722063303, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005414659972302616, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3524972200393677, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9296875, "eval_signal/frontier_coverage_5/group_std_mean": 0.4465137869119644, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006434518378227949, "eval_signal/frontier_coverage_5/weight": 0.018254097551107407, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006434518378227949, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.015601360471919179, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9375, "eval_signal/frontier_ece_reward/group_std_mean": 0.019876172300428152, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021834074286744, "eval_signal/frontier_ece_reward/weight": 0.13994981348514557, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021834074286744, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3039870262145996, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5390625, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.3767779543995857, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.05443682614713907, "eval_signal/frontier_entropy_batch_reward/weight": 0.1790761500597, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.05443682614713907, "eval_steps_per_second": 0.209, "step": 150 }, { "adaptive_ema/accuracy_reward": 0.4216893837907655, "adaptive_ema/brier_reward": 0.6677967837015515, "adaptive_ema/format_reward": 0.9201758669871953, "adaptive_ema/frontier_aurc_reward": 0.07483301245139518, "adaptive_ema/frontier_coverage_1": 0.16652975308326617, "adaptive_ema/frontier_coverage_10": 0.16652975308326617, "adaptive_ema/frontier_coverage_15": 0.16652975308326617, "adaptive_ema/frontier_coverage_20": 0.16652975308326617, "adaptive_ema/frontier_coverage_25": 0.1637826024910254, "adaptive_ema/frontier_coverage_5": 0.16652975308326617, "adaptive_ema/frontier_ece_reward": 0.08521242073109674, "adaptive_ema/frontier_entropy_batch_reward": -0.1634185454626772, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.05099123567342758, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.02033553533256054, "adaptive_weight/frontier_coverage_1": 0.018320006504654884, "adaptive_weight/frontier_coverage_10": 0.018320006504654884, "adaptive_weight/frontier_coverage_15": 0.018320006504654884, "adaptive_weight/frontier_coverage_20": 0.018320006504654884, "adaptive_weight/frontier_coverage_25": 0.01838039085268974, "adaptive_weight/frontier_coverage_5": 0.018320006504654884, "adaptive_weight/frontier_ece_reward": 0.14041475653648378, "adaptive_weight/frontier_entropy_batch_reward": 0.17857804894447327, "calibration/aurc": 0.41690815238153345, "calibration/batch_distribution_entropy": 0.9677657457156361, "calibration/batch_entropy_100bins": 0.6856321179224288, "calibration/batch_entropy_10bins": 0.9677657457156361, "calibration/batch_entropy_50bins": 0.7908586413950005, "calibration/batch_uniqueness": 0.8968597412109375, "calibration/buffer_distribution_entropy": 0.9741904402212562, "calibration/buffer_entropy_100bins": 0.7102141481794881, "calibration/buffer_entropy_10bins": 0.9741904402212562, "calibration/buffer_entropy_50bins": 0.8090575669865515, "calibration/confidence_entropy": 0.4929868712618986, "calibration/coverage@0%": 0.001171875, "calibration/coverage@1%": 0.001171875, "calibration/coverage@10%": 0.05390625, "calibration/coverage@15%": 0.116015625, "calibration/coverage@20%": 0.139453125, "calibration/coverage@25%": 0.17890625, "calibration/coverage@30%": 0.24375, "calibration/coverage@5%": 0.01875, "calibration/ece": 0.14474409101562496, "calibration/mean_confidence": 0.505406690234375, "calibration/prompt_uniqueness": 0.767236328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 926.4, "completions/max_terminated_length": 488.4, "completions/mean_length": 203.50927734375, "completions/mean_terminated_length": 203.1182373046875, "completions/min_length": 93.2, "completions/min_terminated_length": 93.2, "epoch": 0.496, "grad_norm": 0.0009186447714455426, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 524895688.0, "reward": 0.8125284075736999, "reward_std": 0.08393937200307847, "rewards/accuracy_reward": 0.53896484375, "rewards/brier_reward": 0.7696381568908691, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003218067158013582, "rewards/frontier_coverage_1": 0.08215026259422302, "rewards/frontier_coverage_10": 0.08215026259422302, "rewards/frontier_coverage_15": 0.08215026259422302, "rewards/frontier_coverage_20": 0.08215026259422302, "rewards/frontier_coverage_25": 0.06435679569840431, "rewards/frontier_coverage_5": 0.08215026259422302, "rewards/frontier_ece_reward": 0.006951355841010809, "rewards/frontier_entropy_batch_reward": -0.03228703960776329, "signal/accuracy_reward/centered_abs_mean": 0.106158447265625, "signal/accuracy_reward/group_bin_occupancy": 0.17421875, "signal/accuracy_reward/group_std_mean": 0.14007158279418946, "signal/accuracy_reward/group_zero_std_frac": 0.60625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0530792236328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0530792236328125, "signal/advantage_abs_mean": 0.06463491767644883, "signal/advantage_pre_scale_abs_mean": 0.06463491767644883, "signal/advantage_pre_scale_std": 0.10683177411556244, "signal/advantage_std": 0.10683177411556244, "signal/brier_reward/centered_abs_mean": 0.1546708345413208, "signal/brier_reward/group_bin_occupancy": 0.834765625, "signal/brier_reward/group_std_mean": 0.19565778970718384, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0078872368671, "signal/brier_reward/weight": 0.05099123567342758, "signal/brier_reward/weighted_centered_abs_mean": 0.0078872368671, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003242273861542344, "signal/frontier_aurc_reward/group_bin_occupancy": 0.712109375, "signal/frontier_aurc_reward/group_std_mean": 0.005140113364905119, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.592829740839078e-05, "signal/frontier_aurc_reward/weight": 0.02033553533256054, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.592829740839078e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19634953141212463, "signal/frontier_coverage_1/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_1/group_std_mean": 0.2511405676603317, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035970427095890047, "signal/frontier_coverage_1/weight": 0.018320006504654884, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035970427095890047, "signal/frontier_coverage_10/centered_abs_mean": 0.19634953141212463, "signal/frontier_coverage_10/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_10/group_std_mean": 0.2511405676603317, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035970427095890047, "signal/frontier_coverage_10/weight": 0.018320006504654884, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035970427095890047, "signal/frontier_coverage_15/centered_abs_mean": 0.19634953141212463, "signal/frontier_coverage_15/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_15/group_std_mean": 0.2511405676603317, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035970427095890047, "signal/frontier_coverage_15/weight": 0.018320006504654884, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035970427095890047, "signal/frontier_coverage_20/centered_abs_mean": 0.19634953141212463, "signal/frontier_coverage_20/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_20/group_std_mean": 0.2511405676603317, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035970427095890047, "signal/frontier_coverage_20/weight": 0.018320006504654884, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035970427095890047, "signal/frontier_coverage_25/centered_abs_mean": 0.158232381939888, "signal/frontier_coverage_25/group_bin_occupancy": 0.84140625, "signal/frontier_coverage_25/group_std_mean": 0.20365700125694275, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002908079931512475, "signal/frontier_coverage_25/weight": 0.01838039085268974, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002908079931512475, "signal/frontier_coverage_5/centered_abs_mean": 0.19634953141212463, "signal/frontier_coverage_5/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_5/group_std_mean": 0.2511405676603317, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035970427095890047, "signal/frontier_coverage_5/weight": 0.018320006504654884, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035970427095890047, "signal/frontier_ece_reward/centered_abs_mean": 0.011543996259570121, "signal/frontier_ece_reward/group_bin_occupancy": 0.771875, "signal/frontier_ece_reward/group_std_mean": 0.014726097695529461, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016208942281082273, "signal/frontier_ece_reward/weight": 0.14041475653648378, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016208942281082273, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06659752577543258, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.590234375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.08772371038794517, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011892184615135193, "signal/frontier_entropy_batch_reward/weight": 0.17857804894447327, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011892184615135193, "step": 155 }, { "adaptive_ema/accuracy_reward": 0.42775765798098997, "adaptive_ema/brier_reward": 0.6731288846155358, "adaptive_ema/format_reward": 0.9240622182023328, "adaptive_ema/frontier_aurc_reward": 0.0710203393077371, "adaptive_ema/frontier_coverage_1": 0.1625095130501547, "adaptive_ema/frontier_coverage_10": 0.1625095130501547, "adaptive_ema/frontier_coverage_15": 0.1625095130501547, "adaptive_ema/frontier_coverage_20": 0.16245797037544615, "adaptive_ema/frontier_coverage_25": 0.1589048029890912, "adaptive_ema/frontier_coverage_5": 0.1625095130501547, "adaptive_ema/frontier_ece_reward": 0.08139468145157765, "adaptive_ema/frontier_entropy_batch_reward": -0.1572926740027641, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.050226838886737825, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.020441357418894766, "adaptive_weight/frontier_coverage_1": 0.018428217619657516, "adaptive_weight/frontier_coverage_10": 0.018428217619657516, "adaptive_weight/frontier_coverage_15": 0.018428217619657516, "adaptive_weight/frontier_coverage_20": 0.018429351970553398, "adaptive_weight/frontier_coverage_25": 0.018507539853453638, "adaptive_weight/frontier_coverage_5": 0.018428217619657516, "adaptive_weight/frontier_ece_reward": 0.1411527931690216, "adaptive_weight/frontier_entropy_batch_reward": 0.17782924175262452, "calibration/aurc": 0.3364546318335065, "calibration/batch_distribution_entropy": 0.9636604192444294, "calibration/batch_entropy_100bins": 0.6906049356265781, "calibration/batch_entropy_10bins": 0.9636604192444294, "calibration/batch_entropy_50bins": 0.7936943544550781, "calibration/batch_uniqueness": 0.8961584303052467, "calibration/buffer_distribution_entropy": 0.9748793949875173, "calibration/buffer_entropy_100bins": 0.7105535912341152, "calibration/buffer_entropy_10bins": 0.9748793949875173, "calibration/buffer_entropy_50bins": 0.8093978297658875, "calibration/confidence_entropy": 0.49873162920203085, "calibration/coverage@0%": 0.010546875, "calibration/coverage@1%": 0.010546875, "calibration/coverage@10%": 0.126953125, "calibration/coverage@15%": 0.203515625, "calibration/coverage@20%": 0.3008393468688845, "calibration/coverage@25%": 0.3864450831702544, "calibration/coverage@30%": 0.45917089652641874, "calibration/coverage@5%": 0.055859375, "calibration/ece": 0.13006195507815585, "calibration/mean_confidence": 0.4916528810275193, "calibration/prompt_uniqueness": 0.77803577816077, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 927.6, "completions/max_terminated_length": 495.2, "completions/mean_length": 202.1953125, "completions/mean_terminated_length": 201.80372009277343, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.512, "grad_norm": 0.0009540948085486889, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 542111832.0, "reward": 0.8123380541801453, "reward_std": 0.08555371910333634, "rewards/accuracy_reward": 0.53466796875, "rewards/brier_reward": 0.7846086502075196, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.002941929781809449, "rewards/frontier_coverage_1": 0.10255831480026245, "rewards/frontier_coverage_10": 0.10255831480026245, "rewards/frontier_coverage_15": 0.10255831480026245, "rewards/frontier_coverage_20": 0.10150663703680038, "rewards/frontier_coverage_25": 0.07860263586044311, "rewards/frontier_coverage_5": 0.10255831480026245, "rewards/frontier_ece_reward": 0.007728977501392365, "rewards/frontier_entropy_batch_reward": -0.035049394518136975, "signal/accuracy_reward/centered_abs_mean": 0.111737060546875, "signal/accuracy_reward/group_bin_occupancy": 0.178515625, "signal/accuracy_reward/group_std_mean": 0.14923475980758666, "signal/accuracy_reward/group_zero_std_frac": 0.571875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0558685302734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0558685302734375, "signal/advantage_abs_mean": 0.0649673268198967, "signal/advantage_pre_scale_abs_mean": 0.0649673268198967, "signal/advantage_pre_scale_std": 0.10899066478013993, "signal/advantage_std": 0.10899066478013993, "signal/brier_reward/centered_abs_mean": 0.14757270514965057, "signal/brier_reward/group_bin_occupancy": 0.832421875, "signal/brier_reward/group_std_mean": 0.19025132954120635, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.007413102779537439, "signal/brier_reward/weight": 0.050226838886737825, "signal/brier_reward/weighted_centered_abs_mean": 0.007413102779537439, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032049688510596753, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6890625, "signal/frontier_aurc_reward/group_std_mean": 0.005401439126580953, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.551606275024824e-05, "signal/frontier_aurc_reward/weight": 0.020441357418894766, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.551606275024824e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18966628313064576, "signal/frontier_coverage_1/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_1/group_std_mean": 0.2492189884185791, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003495160723105073, "signal/frontier_coverage_1/weight": 0.018428217619657516, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003495160723105073, "signal/frontier_coverage_10/centered_abs_mean": 0.18966628313064576, "signal/frontier_coverage_10/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_10/group_std_mean": 0.2492189884185791, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003495160723105073, "signal/frontier_coverage_10/weight": 0.018428217619657516, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003495160723105073, "signal/frontier_coverage_15/centered_abs_mean": 0.18966628313064576, "signal/frontier_coverage_15/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_15/group_std_mean": 0.2492189884185791, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003495160723105073, "signal/frontier_coverage_15/weight": 0.018428217619657516, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003495160723105073, "signal/frontier_coverage_20/centered_abs_mean": 0.1885404407978058, "signal/frontier_coverage_20/group_bin_occupancy": 0.8625, "signal/frontier_coverage_20/group_std_mean": 0.2477384924888611, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034746643155813216, "signal/frontier_coverage_20/weight": 0.018429351970553398, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034746643155813216, "signal/frontier_coverage_25/centered_abs_mean": 0.1385777235031128, "signal/frontier_coverage_25/group_bin_occupancy": 0.8453125, "signal/frontier_coverage_25/group_std_mean": 0.1832002341747284, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002564464882016182, "signal/frontier_coverage_25/weight": 0.018507539853453638, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002564464882016182, "signal/frontier_coverage_5/centered_abs_mean": 0.18966628313064576, "signal/frontier_coverage_5/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_5/group_std_mean": 0.2492189884185791, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003495160723105073, "signal/frontier_coverage_5/weight": 0.018428217619657516, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003495160723105073, "signal/frontier_ece_reward/centered_abs_mean": 0.011046069860458373, "signal/frontier_ece_reward/group_bin_occupancy": 0.776953125, "signal/frontier_ece_reward/group_std_mean": 0.014163880608975887, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015591410920023918, "signal/frontier_ece_reward/weight": 0.1411527931690216, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015591410920023918, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06716678887605668, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.61640625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.08615255355834961, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011946601420640945, "signal/frontier_entropy_batch_reward/weight": 0.17782924175262452, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011946601420640945, "step": 160 }, { "adaptive_ema/accuracy_reward": 0.4325205851242789, "adaptive_ema/brier_reward": 0.6788407846972966, "adaptive_ema/format_reward": 0.9277763152493026, "adaptive_ema/frontier_aurc_reward": 0.06739938548177218, "adaptive_ema/frontier_coverage_1": 0.1604166561415282, "adaptive_ema/frontier_coverage_10": 0.1604166561415282, "adaptive_ema/frontier_coverage_15": 0.1604166561415282, "adaptive_ema/frontier_coverage_20": 0.16033269429267963, "adaptive_ema/frontier_coverage_25": 0.1555716756944678, "adaptive_ema/frontier_coverage_5": 0.1604166561415282, "adaptive_ema/frontier_ece_reward": 0.07779708224033675, "adaptive_ema/frontier_entropy_batch_reward": -0.15159694629667597, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.049429801851511, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.0205545574426651, "adaptive_weight/frontier_coverage_1": 0.01850444935262203, "adaptive_weight/frontier_coverage_10": 0.01850444935262203, "adaptive_weight/frontier_coverage_15": 0.01850444935262203, "adaptive_weight/frontier_coverage_20": 0.018506300821900367, "adaptive_weight/frontier_coverage_25": 0.01861123591661453, "adaptive_weight/frontier_coverage_5": 0.01850444935262203, "adaptive_weight/frontier_ece_reward": 0.14193709790706635, "adaptive_weight/frontier_entropy_batch_reward": 0.1772432029247284, "calibration/aurc": 0.22409343936084275, "calibration/batch_distribution_entropy": 0.9689437318170097, "calibration/batch_entropy_100bins": 0.6919291075244673, "calibration/batch_entropy_10bins": 0.9689437318170097, "calibration/batch_entropy_50bins": 0.7959131008887249, "calibration/batch_uniqueness": 0.8983062744140625, "calibration/buffer_distribution_entropy": 0.9754367199459588, "calibration/buffer_entropy_100bins": 0.7101681510025485, "calibration/buffer_entropy_10bins": 0.9754367199459588, "calibration/buffer_entropy_50bins": 0.8092072534086444, "calibration/confidence_entropy": 0.47571831432345046, "calibration/coverage@0%": 0.01328125, "calibration/coverage@1%": 0.01328125, "calibration/coverage@10%": 0.209375, "calibration/coverage@15%": 0.387109375, "calibration/coverage@20%": 0.5140625, "calibration/coverage@25%": 0.62734375, "calibration/coverage@30%": 0.703125, "calibration/coverage@5%": 0.084375, "calibration/ece": 0.12249218750000002, "calibration/mean_confidence": 0.5035312499999999, "calibration/prompt_uniqueness": 0.755419921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/max_terminated_length": 471.0, "completions/mean_length": 204.42216796875, "completions/mean_terminated_length": 204.42216796875, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.528, "grad_norm": 0.0020615458488464355, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 559234651.0, "reward": 0.809805941581726, "reward_std": 0.08527890592813492, "rewards/accuracy_reward": 0.5328125, "rewards/brier_reward": 0.7901050925254822, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002666196506470442, "rewards/frontier_coverage_1": 0.11995811313390732, "rewards/frontier_coverage_10": 0.11995811313390732, "rewards/frontier_coverage_15": 0.11995811313390732, "rewards/frontier_coverage_20": 0.11858220845460891, "rewards/frontier_coverage_25": 0.08949183002114296, "rewards/frontier_coverage_5": 0.11995811313390732, "rewards/frontier_ece_reward": 0.007980644144117833, "rewards/frontier_entropy_batch_reward": -0.05378929451107979, "signal/accuracy_reward/centered_abs_mean": 0.1185791015625, "signal/accuracy_reward/group_bin_occupancy": 0.178125, "signal/accuracy_reward/group_std_mean": 0.15306617319583893, "signal/accuracy_reward/group_zero_std_frac": 0.575, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05928955078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05928955078125, "signal/advantage_abs_mean": 0.06676331087946892, "signal/advantage_pre_scale_abs_mean": 0.06676331087946892, "signal/advantage_pre_scale_std": 0.1093256339430809, "signal/advantage_std": 0.1093256339430809, "signal/brier_reward/centered_abs_mean": 0.14486051201820374, "signal/brier_reward/group_bin_occupancy": 0.81328125, "signal/brier_reward/group_std_mean": 0.18578538298606873, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.007160227932035923, "signal/brier_reward/weight": 0.049429801851511, "signal/brier_reward/weighted_centered_abs_mean": 0.007160227932035923, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002835435047745705, "signal/frontier_aurc_reward/group_bin_occupancy": 0.689453125, "signal/frontier_aurc_reward/group_std_mean": 0.004600296774879098, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.827866843901575e-05, "signal/frontier_aurc_reward/weight": 0.0205545574426651, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.827866843901575e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2021091252565384, "signal/frontier_coverage_1/group_bin_occupancy": 0.84921875, "signal/frontier_coverage_1/group_std_mean": 0.2615320235490799, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037399885710328817, "signal/frontier_coverage_1/weight": 0.01850444935262203, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037399885710328817, "signal/frontier_coverage_10/centered_abs_mean": 0.2021091252565384, "signal/frontier_coverage_10/group_bin_occupancy": 0.84921875, "signal/frontier_coverage_10/group_std_mean": 0.2615320235490799, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037399885710328817, "signal/frontier_coverage_10/weight": 0.01850444935262203, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037399885710328817, "signal/frontier_coverage_15/centered_abs_mean": 0.2021091252565384, "signal/frontier_coverage_15/group_bin_occupancy": 0.84921875, "signal/frontier_coverage_15/group_std_mean": 0.2615320235490799, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037399885710328817, "signal/frontier_coverage_15/weight": 0.01850444935262203, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037399885710328817, "signal/frontier_coverage_20/centered_abs_mean": 0.1976030260324478, "signal/frontier_coverage_20/group_bin_occupancy": 0.84609375, "signal/frontier_coverage_20/group_std_mean": 0.2558705747127533, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036569551099091767, "signal/frontier_coverage_20/weight": 0.018506300821900367, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036569551099091767, "signal/frontier_coverage_25/centered_abs_mean": 0.1361584484577179, "signal/frontier_coverage_25/group_bin_occupancy": 0.84140625, "signal/frontier_coverage_25/group_std_mean": 0.17853571772575377, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002534055197611451, "signal/frontier_coverage_25/weight": 0.01861123591661453, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002534055197611451, "signal/frontier_coverage_5/centered_abs_mean": 0.2021091252565384, "signal/frontier_coverage_5/group_bin_occupancy": 0.84921875, "signal/frontier_coverage_5/group_std_mean": 0.2615320235490799, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037399885710328817, "signal/frontier_coverage_5/weight": 0.01850444935262203, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037399885710328817, "signal/frontier_ece_reward/centered_abs_mean": 0.01067428793758154, "signal/frontier_ece_reward/group_bin_occupancy": 0.76875, "signal/frontier_ece_reward/group_std_mean": 0.01351653877645731, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015151004772633314, "signal/frontier_ece_reward/weight": 0.14193709790706635, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015151004772633314, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07872896939516068, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.567578125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.10164814293384553, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013953628391027451, "signal/frontier_entropy_batch_reward/weight": 0.1772432029247284, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013953628391027451, "step": 165 }, { "adaptive_ema/accuracy_reward": 0.4383067569373906, "adaptive_ema/brier_reward": 0.6838993677795594, "adaptive_ema/format_reward": 0.931300554007238, "adaptive_ema/frontier_aurc_reward": 0.06395956977261932, "adaptive_ema/frontier_coverage_1": 0.15742099640782753, "adaptive_ema/frontier_coverage_10": 0.15742099640782753, "adaptive_ema/frontier_coverage_15": 0.15742099640782753, "adaptive_ema/frontier_coverage_20": 0.157249171840029, "adaptive_ema/frontier_coverage_25": 0.15150339712214747, "adaptive_ema/frontier_coverage_5": 0.15742099640782753, "adaptive_ema/frontier_ece_reward": 0.07434109182543788, "adaptive_ema/frontier_entropy_batch_reward": -0.14720343264181412, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.04869266897439957, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.020647920295596124, "adaptive_weight/frontier_coverage_1": 0.01858627498149872, "adaptive_weight/frontier_coverage_10": 0.01858627498149872, "adaptive_weight/frontier_coverage_15": 0.01858627498149872, "adaptive_weight/frontier_coverage_20": 0.018590064719319344, "adaptive_weight/frontier_coverage_25": 0.018716811016201974, "adaptive_weight/frontier_coverage_5": 0.01858627498149872, "adaptive_weight/frontier_ece_reward": 0.1425902009010315, "adaptive_weight/frontier_entropy_batch_reward": 0.17671723365783693, "calibration/aurc": 0.25509466056467095, "calibration/batch_distribution_entropy": 0.9725157625345979, "calibration/batch_entropy_100bins": 0.7019710805310249, "calibration/batch_entropy_10bins": 0.9725157625345979, "calibration/batch_entropy_50bins": 0.8013937346369447, "calibration/batch_uniqueness": 0.8974367551701217, "calibration/buffer_distribution_entropy": 0.9759876725866707, "calibration/buffer_entropy_100bins": 0.7097671052229481, "calibration/buffer_entropy_10bins": 0.9759876725866707, "calibration/buffer_entropy_50bins": 0.808892690946708, "calibration/confidence_entropy": 0.450354472679105, "calibration/coverage@0%": 0.011728687622309197, "calibration/coverage@1%": 0.011728687622309197, "calibration/coverage@10%": 0.08204118762230919, "calibration/coverage@15%": 0.2794092465753425, "calibration/coverage@20%": 0.4244251467710372, "calibration/coverage@25%": 0.5561261619373777, "calibration/coverage@30%": 0.6436651479941291, "calibration/coverage@5%": 0.011728687622309197, "calibration/ece": 0.11073061399217224, "calibration/mean_confidence": 0.4943483213062622, "calibration/prompt_uniqueness": 0.7487230505332987, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1145.2, "completions/max_terminated_length": 651.2, "completions/mean_length": 208.3013671875, "completions/mean_terminated_length": 207.65260314941406, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.544, "grad_norm": 0.0009282738319598138, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 576531241.0, "reward": 0.8175342917442322, "reward_std": 0.09144499897956848, "rewards/accuracy_reward": 0.55869140625, "rewards/brier_reward": 0.7725163578987122, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.002964147459715605, "rewards/frontier_coverage_1": 0.08193773776292801, "rewards/frontier_coverage_10": 0.08193773776292801, "rewards/frontier_coverage_15": 0.08193773776292801, "rewards/frontier_coverage_20": 0.08030623197555542, "rewards/frontier_coverage_25": 0.05987264439463615, "rewards/frontier_coverage_5": 0.08193773776292801, "rewards/frontier_ece_reward": 0.006552364397794008, "rewards/frontier_entropy_batch_reward": -0.05003713071346283, "signal/accuracy_reward/centered_abs_mean": 0.129425048828125, "signal/accuracy_reward/group_bin_occupancy": 0.1859375, "signal/accuracy_reward/group_std_mean": 0.1712253749370575, "signal/accuracy_reward/group_zero_std_frac": 0.5125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0647125244140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0647125244140625, "signal/advantage_abs_mean": 0.06956043541431427, "signal/advantage_pre_scale_abs_mean": 0.06956043541431427, "signal/advantage_pre_scale_std": 0.11212797164916992, "signal/advantage_std": 0.11212797164916992, "signal/brier_reward/centered_abs_mean": 0.15993968546390533, "signal/brier_reward/group_bin_occupancy": 0.828515625, "signal/brier_reward/group_std_mean": 0.20379654169082642, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.007788042444735766, "signal/brier_reward/weight": 0.04869266897439957, "signal/brier_reward/weighted_centered_abs_mean": 0.007788042444735766, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003263408271595836, "signal/frontier_aurc_reward/group_bin_occupancy": 0.670703125, "signal/frontier_aurc_reward/group_std_mean": 0.005496641155332327, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.738151423633099e-05, "signal/frontier_aurc_reward/weight": 0.020647920295596124, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.738151423633099e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2161277115345001, "signal/frontier_coverage_1/group_bin_occupancy": 0.859375, "signal/frontier_coverage_1/group_std_mean": 0.2762247920036316, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004016902018338442, "signal/frontier_coverage_1/weight": 0.01858627498149872, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004016902018338442, "signal/frontier_coverage_10/centered_abs_mean": 0.2161277115345001, "signal/frontier_coverage_10/group_bin_occupancy": 0.859375, "signal/frontier_coverage_10/group_std_mean": 0.2762247920036316, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004016902018338442, "signal/frontier_coverage_10/weight": 0.01858627498149872, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004016902018338442, "signal/frontier_coverage_15/centered_abs_mean": 0.2161277115345001, "signal/frontier_coverage_15/group_bin_occupancy": 0.859375, "signal/frontier_coverage_15/group_std_mean": 0.2762247920036316, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004016902018338442, "signal/frontier_coverage_15/weight": 0.01858627498149872, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004016902018338442, "signal/frontier_coverage_20/centered_abs_mean": 0.2121141731739044, "signal/frontier_coverage_20/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_20/group_std_mean": 0.27119354605674745, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0039430524222552775, "signal/frontier_coverage_20/weight": 0.018590064719319344, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0039430524222552775, "signal/frontier_coverage_25/centered_abs_mean": 0.14031263887882234, "signal/frontier_coverage_25/group_bin_occupancy": 0.851953125, "signal/frontier_coverage_25/group_std_mean": 0.18075886964797974, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026261318940669297, "signal/frontier_coverage_25/weight": 0.018716811016201974, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026261318940669297, "signal/frontier_coverage_5/centered_abs_mean": 0.2161277115345001, "signal/frontier_coverage_5/group_bin_occupancy": 0.859375, "signal/frontier_coverage_5/group_std_mean": 0.2762247920036316, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004016902018338442, "signal/frontier_coverage_5/weight": 0.01858627498149872, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004016902018338442, "signal/frontier_ece_reward/centered_abs_mean": 0.011203336343169213, "signal/frontier_ece_reward/group_bin_occupancy": 0.78828125, "signal/frontier_ece_reward/group_std_mean": 0.014098120294511319, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015974577516317367, "signal/frontier_ece_reward/weight": 0.1425902009010315, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015974577516317367, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07690173387527466, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.54921875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.09812586307525635, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013595831673592329, "signal/frontier_entropy_batch_reward/weight": 0.17671723365783693, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013595831673592329, "step": 170 }, { "adaptive_ema/accuracy_reward": 0.4436072018924998, "adaptive_ema/brier_reward": 0.6883438409870493, "adaptive_ema/format_reward": 0.9346330694496237, "adaptive_ema/frontier_aurc_reward": 0.06068736120478294, "adaptive_ema/frontier_coverage_1": 0.15437627981998278, "adaptive_ema/frontier_coverage_10": 0.15437627981998278, "adaptive_ema/frontier_coverage_15": 0.15437627981998278, "adaptive_ema/frontier_coverage_20": 0.15415214953497286, "adaptive_ema/frontier_coverage_25": 0.14754116154013136, "adaptive_ema/frontier_coverage_5": 0.15437627981998278, "adaptive_ema/frontier_ece_reward": 0.07102756252520259, "adaptive_ema/frontier_entropy_batch_reward": -0.14206769968694552, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.048053061962127684, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.020739533379673956, "adaptive_weight/frontier_coverage_1": 0.018670928850769996, "adaptive_weight/frontier_coverage_10": 0.018670928850769996, "adaptive_weight/frontier_coverage_15": 0.018670928850769996, "adaptive_weight/frontier_coverage_20": 0.018675877153873442, "adaptive_weight/frontier_coverage_25": 0.018821846693754196, "adaptive_weight/frontier_coverage_5": 0.018670928850769996, "adaptive_weight/frontier_ece_reward": 0.14323482811450958, "adaptive_weight/frontier_entropy_batch_reward": 0.17609113454818726, "calibration/aurc": 0.2730237709859521, "calibration/batch_distribution_entropy": 0.9713218020962746, "calibration/batch_entropy_100bins": 0.7193840849266884, "calibration/batch_entropy_10bins": 0.9713218020962746, "calibration/batch_entropy_50bins": 0.8190414280746543, "calibration/batch_uniqueness": 0.9050258267993678, "calibration/buffer_distribution_entropy": 0.9767955435357575, "calibration/buffer_entropy_100bins": 0.710864333067301, "calibration/buffer_entropy_10bins": 0.9767955435357575, "calibration/buffer_entropy_50bins": 0.8100096000160144, "calibration/confidence_entropy": 0.46547111782143746, "calibration/coverage@0%": 0.005475629892367906, "calibration/coverage@1%": 0.005475629892367906, "calibration/coverage@10%": 0.19846272627201564, "calibration/coverage@15%": 0.3146312377690802, "calibration/coverage@20%": 0.40383057118395305, "calibration/coverage@25%": 0.48442163038160474, "calibration/coverage@30%": 0.5344919581702544, "calibration/coverage@5%": 0.1410225048923679, "calibration/ece": 0.15978001365786038, "calibration/mean_confidence": 0.44530827829419434, "calibration/prompt_uniqueness": 0.7652296496975806, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1149.0, "completions/max_terminated_length": 646.4, "completions/mean_length": 208.6453125, "completions/mean_terminated_length": 208.12649230957032, "completions/min_length": 94.2, "completions/min_terminated_length": 94.2, "epoch": 0.56, "grad_norm": 0.0010633780620992184, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 593489177.0, "reward": 0.8009234666824341, "reward_std": 0.08025546967983246, "rewards/accuracy_reward": 0.52197265625, "rewards/brier_reward": 0.7800888299942017, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0029034676495939492, "rewards/frontier_coverage_1": 0.11806583553552627, "rewards/frontier_coverage_10": 0.11806583553552627, "rewards/frontier_coverage_15": 0.11806583553552627, "rewards/frontier_coverage_20": 0.11465947777032852, "rewards/frontier_coverage_25": 0.08335476815700531, "rewards/frontier_coverage_5": 0.11806583553552627, "rewards/frontier_ece_reward": 0.007106996979564429, "rewards/frontier_entropy_batch_reward": -0.0612617876380682, "signal/accuracy_reward/centered_abs_mean": 0.096990966796875, "signal/accuracy_reward/group_bin_occupancy": 0.176953125, "signal/accuracy_reward/group_std_mean": 0.1360134780406952, "signal/accuracy_reward/group_zero_std_frac": 0.584375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0484954833984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0484954833984375, "signal/advantage_abs_mean": 0.05966150388121605, "signal/advantage_pre_scale_abs_mean": 0.05966150388121605, "signal/advantage_pre_scale_std": 0.10060604065656661, "signal/advantage_std": 0.10060604065656661, "signal/brier_reward/centered_abs_mean": 0.15158625245094298, "signal/brier_reward/group_bin_occupancy": 0.81875, "signal/brier_reward/group_std_mean": 0.19490756690502167, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0072855155915021895, "signal/brier_reward/weight": 0.048053061962127684, "signal/brier_reward/weighted_centered_abs_mean": 0.0072855155915021895, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_bin_occupancy": 0.127734375, "signal/format_reward/group_std_mean": 0.0038669900968670845, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.003106844192370772, "signal/frontier_aurc_reward/group_bin_occupancy": 0.68359375, "signal/frontier_aurc_reward/group_std_mean": 0.005209229234606027, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.443956008297391e-05, "signal/frontier_aurc_reward/weight": 0.020739533379673956, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.443956008297391e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20348803400993348, "signal/frontier_coverage_1/group_bin_occupancy": 0.851953125, "signal/frontier_coverage_1/group_std_mean": 0.2632997930049896, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037990906741470097, "signal/frontier_coverage_1/weight": 0.018670928850769996, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037990906741470097, "signal/frontier_coverage_10/centered_abs_mean": 0.20348803400993348, "signal/frontier_coverage_10/group_bin_occupancy": 0.851953125, "signal/frontier_coverage_10/group_std_mean": 0.2632997930049896, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037990906741470097, "signal/frontier_coverage_10/weight": 0.018670928850769996, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037990906741470097, "signal/frontier_coverage_15/centered_abs_mean": 0.20348803400993348, "signal/frontier_coverage_15/group_bin_occupancy": 0.851953125, "signal/frontier_coverage_15/group_std_mean": 0.2632997930049896, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037990906741470097, "signal/frontier_coverage_15/weight": 0.018670928850769996, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037990906741470097, "signal/frontier_coverage_20/centered_abs_mean": 0.1926076591014862, "signal/frontier_coverage_20/group_bin_occupancy": 0.848828125, "signal/frontier_coverage_20/group_std_mean": 0.24944129288196565, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035968903452157975, "signal/frontier_coverage_20/weight": 0.018675877153873442, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035968903452157975, "signal/frontier_coverage_25/centered_abs_mean": 0.1296244353055954, "signal/frontier_coverage_25/group_bin_occupancy": 0.85625, "signal/frontier_coverage_25/group_std_mean": 0.16898023784160615, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024395577609539034, "signal/frontier_coverage_25/weight": 0.018821846693754196, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024395577609539034, "signal/frontier_coverage_5/centered_abs_mean": 0.20348803400993348, "signal/frontier_coverage_5/group_bin_occupancy": 0.851953125, "signal/frontier_coverage_5/group_std_mean": 0.2632997930049896, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037990906741470097, "signal/frontier_coverage_5/weight": 0.018670928850769996, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037990906741470097, "signal/frontier_ece_reward/centered_abs_mean": 0.010423007607460021, "signal/frontier_ece_reward/group_bin_occupancy": 0.778515625, "signal/frontier_ece_reward/group_std_mean": 0.013230705820024013, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001492859236896038, "signal/frontier_ece_reward/weight": 0.14323482811450958, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001492859236896038, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08325772732496262, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.55859375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.10612156391143798, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014658915996551513, "signal/frontier_entropy_batch_reward/weight": 0.17609113454818726, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014658915996551513, "step": 175 }, { "adaptive_ema/accuracy_reward": 0.44623034630379654, "adaptive_ema/brier_reward": 0.6926405104345207, "adaptive_ema/format_reward": 0.937817631708912, "adaptive_ema/frontier_aurc_reward": 0.05754573219740727, "adaptive_ema/frontier_coverage_1": 0.15310669916635733, "adaptive_ema/frontier_coverage_10": 0.15310669916635733, "adaptive_ema/frontier_coverage_15": 0.15310669916635733, "adaptive_ema/frontier_coverage_20": 0.1524453964077967, "adaptive_ema/frontier_coverage_25": 0.14439836258841363, "adaptive_ema/frontier_coverage_5": 0.15310669916635733, "adaptive_ema/frontier_ece_reward": 0.06787384748708213, "adaptive_ema/frontier_entropy_batch_reward": -0.13817989528525737, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.04743674695491791, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.020829180255532265, "adaptive_weight/frontier_coverage_1": 0.018717186525464058, "adaptive_weight/frontier_coverage_10": 0.018717186525464058, "adaptive_weight/frontier_coverage_15": 0.018717186525464058, "adaptive_weight/frontier_coverage_20": 0.0187318030744791, "adaptive_weight/frontier_coverage_25": 0.018909652531147004, "adaptive_weight/frontier_coverage_5": 0.018717186525464058, "adaptive_weight/frontier_ece_reward": 0.14386117160320283, "adaptive_weight/frontier_entropy_batch_reward": 0.17566269338130952, "calibration/aurc": 0.3368074206827616, "calibration/batch_distribution_entropy": 0.9719911527106417, "calibration/batch_entropy_100bins": 0.7169152619774707, "calibration/batch_entropy_10bins": 0.9719911527106417, "calibration/batch_entropy_50bins": 0.815783581914066, "calibration/batch_uniqueness": 0.9021641926185449, "calibration/buffer_distribution_entropy": 0.9770301781043716, "calibration/buffer_entropy_100bins": 0.7121975254743294, "calibration/buffer_entropy_10bins": 0.9770301781043716, "calibration/buffer_entropy_50bins": 0.8112773883011256, "calibration/confidence_entropy": 0.47810382196565415, "calibration/coverage@0%": 0.003126528864970646, "calibration/coverage@1%": 0.003126528864970646, "calibration/coverage@10%": 0.05937652886497065, "calibration/coverage@15%": 0.10941398605675148, "calibration/coverage@20%": 0.2547303082191781, "calibration/coverage@25%": 0.3168396832191781, "calibration/coverage@30%": 0.39585906922700587, "calibration/coverage@5%": 0.003126528864970646, "calibration/ece": 0.09531413935390712, "calibration/mean_confidence": 0.4680983178378737, "calibration/prompt_uniqueness": 0.7562092506178459, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 753.2, "completions/max_terminated_length": 538.4, "completions/mean_length": 210.11298828125, "completions/mean_terminated_length": 209.98328247070313, "completions/min_length": 86.6, "completions/min_terminated_length": 86.6, "epoch": 0.576, "grad_norm": 0.00579429604113102, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 610827358.0, "reward": 0.7945305824279785, "reward_std": 0.07392666339874268, "rewards/accuracy_reward": 0.50703125, "rewards/brier_reward": 0.775688111782074, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0032747992780059578, "rewards/frontier_coverage_1": 0.12220384031534196, "rewards/frontier_coverage_10": 0.12220384031534196, "rewards/frontier_coverage_15": 0.12220384031534196, "rewards/frontier_coverage_20": 0.11271904930472373, "rewards/frontier_coverage_25": 0.07997815757989883, "rewards/frontier_coverage_5": 0.12220384031534196, "rewards/frontier_ece_reward": 0.00646611051633954, "rewards/frontier_entropy_batch_reward": -0.053216959536075595, "signal/accuracy_reward/centered_abs_mean": 0.08941650390625, "signal/accuracy_reward/group_bin_occupancy": 0.171875, "signal/accuracy_reward/group_std_mean": 0.123899807035923, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044708251953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.044708251953125, "signal/advantage_abs_mean": 0.05529655441641808, "signal/advantage_pre_scale_abs_mean": 0.05529655441641808, "signal/advantage_pre_scale_std": 0.0951567530632019, "signal/advantage_std": 0.0951567530632019, "signal/brier_reward/centered_abs_mean": 0.1453737199306488, "signal/brier_reward/group_bin_occupancy": 0.8125, "signal/brier_reward/group_std_mean": 0.18699900507926942, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.006896778661757708, "signal/brier_reward/weight": 0.04743674695491791, "signal/brier_reward/weighted_centered_abs_mean": 0.006896778661757708, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003002939047291875, "signal/frontier_aurc_reward/group_bin_occupancy": 0.691015625, "signal/frontier_aurc_reward/group_std_mean": 0.004798801522701979, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.253647443372756e-05, "signal/frontier_aurc_reward/weight": 0.020829180255532265, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.253647443372756e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19401188492774962, "signal/frontier_coverage_1/group_bin_occupancy": 0.8515625, "signal/frontier_coverage_1/group_std_mean": 0.2502962052822113, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036313798744231464, "signal/frontier_coverage_1/weight": 0.018717186525464058, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036313798744231464, "signal/frontier_coverage_10/centered_abs_mean": 0.19401188492774962, "signal/frontier_coverage_10/group_bin_occupancy": 0.8515625, "signal/frontier_coverage_10/group_std_mean": 0.2502962052822113, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036313798744231464, "signal/frontier_coverage_10/weight": 0.018717186525464058, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036313798744231464, "signal/frontier_coverage_15/centered_abs_mean": 0.19401188492774962, "signal/frontier_coverage_15/group_bin_occupancy": 0.8515625, "signal/frontier_coverage_15/group_std_mean": 0.2502962052822113, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036313798744231464, "signal/frontier_coverage_15/weight": 0.018717186525464058, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036313798744231464, "signal/frontier_coverage_20/centered_abs_mean": 0.18169912993907927, "signal/frontier_coverage_20/group_bin_occupancy": 0.84765625, "signal/frontier_coverage_20/group_std_mean": 0.23473725318908692, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034035469871014357, "signal/frontier_coverage_20/weight": 0.0187318030744791, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034035469871014357, "signal/frontier_coverage_25/centered_abs_mean": 0.12429940104484558, "signal/frontier_coverage_25/group_bin_occupancy": 0.837890625, "signal/frontier_coverage_25/group_std_mean": 0.16169273257255554, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002350442623719573, "signal/frontier_coverage_25/weight": 0.018909652531147004, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002350442623719573, "signal/frontier_coverage_5/centered_abs_mean": 0.19401188492774962, "signal/frontier_coverage_5/group_bin_occupancy": 0.8515625, "signal/frontier_coverage_5/group_std_mean": 0.2502962052822113, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036313798744231464, "signal/frontier_coverage_5/weight": 0.018717186525464058, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036313798744231464, "signal/frontier_ece_reward/centered_abs_mean": 0.009726777486503125, "signal/frontier_ece_reward/group_bin_occupancy": 0.75625, "signal/frontier_ece_reward/group_std_mean": 0.012411239556968212, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013992791762575507, "signal/frontier_ece_reward/weight": 0.14386117160320283, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013992791762575507, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07289832383394242, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.576953125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.09288608580827713, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012805469892919063, "signal/frontier_entropy_batch_reward/weight": 0.17566269338130952, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.012805469892919063, "step": 180 }, { "adaptive_ema/accuracy_reward": 0.4493698127133222, "adaptive_ema/brier_reward": 0.6970354247086294, "adaptive_ema/format_reward": 0.9408555370667686, "adaptive_ema/frontier_aurc_reward": 0.05457625015351719, "adaptive_ema/frontier_coverage_1": 0.15179881337716356, "adaptive_ema/frontier_coverage_10": 0.15179881337716356, "adaptive_ema/frontier_coverage_15": 0.15179881337716356, "adaptive_ema/frontier_coverage_20": 0.15061851856890654, "adaptive_ema/frontier_coverage_25": 0.1413812478201582, "adaptive_ema/frontier_coverage_5": 0.15179881337716356, "adaptive_ema/frontier_ece_reward": 0.0648879138418748, "adaptive_ema/frontier_entropy_batch_reward": -0.13473297325994987, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.04680159762501716, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.02091408334672451, "adaptive_weight/frontier_coverage_1": 0.018763383477926256, "adaptive_weight/frontier_coverage_10": 0.018763383477926256, "adaptive_weight/frontier_coverage_15": 0.018763383477926256, "adaptive_weight/frontier_coverage_20": 0.018789494037628175, "adaptive_weight/frontier_coverage_25": 0.018993837013840675, "adaptive_weight/frontier_coverage_5": 0.018763383477926256, "adaptive_weight/frontier_ece_reward": 0.14445513784885405, "adaptive_weight/frontier_entropy_batch_reward": 0.17529231011867524, "calibration/aurc": 0.33088350022860324, "calibration/batch_distribution_entropy": 0.9679954682002816, "calibration/batch_entropy_100bins": 0.7056431213138912, "calibration/batch_entropy_10bins": 0.9679954682002816, "calibration/batch_entropy_50bins": 0.8047735588594278, "calibration/batch_uniqueness": 0.8946394081391393, "calibration/buffer_distribution_entropy": 0.9776141899564792, "calibration/buffer_entropy_100bins": 0.7140486705625073, "calibration/buffer_entropy_10bins": 0.9776141899564792, "calibration/buffer_entropy_50bins": 0.8130714209116869, "calibration/confidence_entropy": 0.4649555720368629, "calibration/coverage@0%": 0.0265625, "calibration/coverage@1%": 0.0265625, "calibration/coverage@10%": 0.08795407289628179, "calibration/coverage@15%": 0.22830005503913892, "calibration/coverage@20%": 0.36429106531311156, "calibration/coverage@25%": 0.46940053204500976, "calibration/coverage@30%": 0.5401395853718199, "calibration/coverage@5%": 0.032421875, "calibration/ece": 0.13667109285066767, "calibration/mean_confidence": 0.46604109651994363, "calibration/prompt_uniqueness": 0.7341625138202393, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 912.2, "completions/max_terminated_length": 510.2, "completions/mean_length": 212.197265625, "completions/mean_terminated_length": 211.68154907226562, "completions/min_length": 91.8, "completions/min_terminated_length": 91.8, "epoch": 0.592, "grad_norm": 0.0012193727307021618, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 628167970.0, "reward": 0.7940310955047607, "reward_std": 0.07889594733715058, "rewards/accuracy_reward": 0.5125, "rewards/brier_reward": 0.7858376264572143, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.00308399866335094, "rewards/frontier_coverage_1": 0.12847063392400743, "rewards/frontier_coverage_10": 0.12847063392400743, "rewards/frontier_coverage_15": 0.12847063392400743, "rewards/frontier_coverage_20": 0.11759204119443893, "rewards/frontier_coverage_25": 0.08462818264961243, "rewards/frontier_coverage_5": 0.12847063392400743, "rewards/frontier_ece_reward": 0.007113531790673732, "rewards/frontier_entropy_batch_reward": -0.07595221474766731, "signal/accuracy_reward/centered_abs_mean": 0.0996337890625, "signal/accuracy_reward/group_bin_occupancy": 0.172265625, "signal/accuracy_reward/group_std_mean": 0.1323814406991005, "signal/accuracy_reward/group_zero_std_frac": 0.621875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04981689453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04981689453125, "signal/advantage_abs_mean": 0.06043521165847778, "signal/advantage_pre_scale_abs_mean": 0.06043521165847778, "signal/advantage_pre_scale_std": 0.10175900906324387, "signal/advantage_std": 0.10175900906324387, "signal/brier_reward/centered_abs_mean": 0.1380321741104126, "signal/brier_reward/group_bin_occupancy": 0.804296875, "signal/brier_reward/group_std_mean": 0.17737728655338286, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0064600460231304165, "signal/brier_reward/weight": 0.04680159762501716, "signal/brier_reward/weighted_centered_abs_mean": 0.0064600460231304165, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086068242787, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0030496908351778985, "signal/frontier_aurc_reward/group_bin_occupancy": 0.676953125, "signal/frontier_aurc_reward/group_std_mean": 0.004931708890944719, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.378361213137396e-05, "signal/frontier_aurc_reward/weight": 0.02091408334672451, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.378361213137396e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19160583317279817, "signal/frontier_coverage_1/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_1/group_std_mean": 0.24507599472999572, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035951600410044195, "signal/frontier_coverage_1/weight": 0.018763383477926256, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035951600410044195, "signal/frontier_coverage_10/centered_abs_mean": 0.19160583317279817, "signal/frontier_coverage_10/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_10/group_std_mean": 0.24507599472999572, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035951600410044195, "signal/frontier_coverage_10/weight": 0.018763383477926256, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035951600410044195, "signal/frontier_coverage_15/centered_abs_mean": 0.19160583317279817, "signal/frontier_coverage_15/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_15/group_std_mean": 0.24507599472999572, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035951600410044195, "signal/frontier_coverage_15/weight": 0.018763383477926256, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035951600410044195, "signal/frontier_coverage_20/centered_abs_mean": 0.1756891280412674, "signal/frontier_coverage_20/group_bin_occupancy": 0.857421875, "signal/frontier_coverage_20/group_std_mean": 0.22524542212486268, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003301054053008556, "signal/frontier_coverage_20/weight": 0.018789494037628175, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003301054053008556, "signal/frontier_coverage_25/centered_abs_mean": 0.11949324905872345, "signal/frontier_coverage_25/group_bin_occupancy": 0.850390625, "signal/frontier_coverage_25/group_std_mean": 0.15409801304340362, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022695485502481462, "signal/frontier_coverage_25/weight": 0.018993837013840675, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022695485502481462, "signal/frontier_coverage_5/centered_abs_mean": 0.19160583317279817, "signal/frontier_coverage_5/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_5/group_std_mean": 0.24507599472999572, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035951600410044195, "signal/frontier_coverage_5/weight": 0.018763383477926256, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035951600410044195, "signal/frontier_ece_reward/centered_abs_mean": 0.009413376450538635, "signal/frontier_ece_reward/group_bin_occupancy": 0.753125, "signal/frontier_ece_reward/group_std_mean": 0.01191569771617651, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013597635086625814, "signal/frontier_ece_reward/weight": 0.14445513784885405, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013597635086625814, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10269325971603394, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.54453125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.1308152124285698, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018002053909003734, "signal/frontier_entropy_batch_reward/weight": 0.17529231011867524, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018002053909003734, "step": 185 }, { "adaptive_ema/accuracy_reward": 0.45314128502077644, "adaptive_ema/brier_reward": 0.7017613023013852, "adaptive_ema/format_reward": 0.9437244439127237, "adaptive_ema/frontier_aurc_reward": 0.051766875792472265, "adaptive_ema/frontier_coverage_1": 0.15052466761584, "adaptive_ema/frontier_coverage_10": 0.15052466761584, "adaptive_ema/frontier_coverage_15": 0.15052466761584, "adaptive_ema/frontier_coverage_20": 0.1489783495752756, "adaptive_ema/frontier_coverage_25": 0.13857511475546141, "adaptive_ema/frontier_coverage_5": 0.15052466761584, "adaptive_ema/frontier_ece_reward": 0.06204476964928107, "adaptive_ema/frontier_entropy_batch_reward": -0.1309122085854708, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.04612754210829735, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.021001752093434335, "adaptive_weight/frontier_coverage_1": 0.01881442852318287, "adaptive_weight/frontier_coverage_10": 0.01881442852318287, "adaptive_weight/frontier_coverage_15": 0.01881442852318287, "adaptive_weight/frontier_coverage_20": 0.018848678469657897, "adaptive_weight/frontier_coverage_25": 0.019079096987843512, "adaptive_weight/frontier_coverage_5": 0.01881442852318287, "adaptive_weight/frontier_ece_reward": 0.14507063925266267, "adaptive_weight/frontier_entropy_batch_reward": 0.17491456866264343, "calibration/aurc": 0.2595289017864723, "calibration/batch_distribution_entropy": 0.9623724306524405, "calibration/batch_entropy_100bins": 0.6852396221141621, "calibration/batch_entropy_10bins": 0.9623724306524405, "calibration/batch_entropy_50bins": 0.7850811373268268, "calibration/batch_uniqueness": 0.8823629921002517, "calibration/buffer_distribution_entropy": 0.9781941054039418, "calibration/buffer_entropy_100bins": 0.7150254784873155, "calibration/buffer_entropy_10bins": 0.9781941054039418, "calibration/buffer_entropy_50bins": 0.8138911069544555, "calibration/confidence_entropy": 0.463122295750538, "calibration/coverage@0%": 0.016409325787401573, "calibration/coverage@1%": 0.016409325787401573, "calibration/coverage@10%": 0.21656311515748033, "calibration/coverage@15%": 0.2942974901574803, "calibration/coverage@20%": 0.43224963090551183, "calibration/coverage@25%": 0.5448080708661418, "calibration/coverage@30%": 0.6462567667322835, "calibration/coverage@5%": 0.05937807578740158, "calibration/ece": 0.10539165292814963, "calibration/mean_confidence": 0.44898269623523623, "calibration/prompt_uniqueness": 0.7121562101403061, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1081.6, "completions/max_terminated_length": 749.2, "completions/mean_length": 215.37314453125, "completions/mean_terminated_length": 214.9864929199219, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.608, "grad_norm": 0.0009518972947262228, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 645372879.0, "reward": 0.802846360206604, "reward_std": 0.07410637587308884, "rewards/accuracy_reward": 0.5197265625, "rewards/brier_reward": 0.8024592280387879, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0026110153179615737, "rewards/frontier_coverage_1": 0.14084916114807128, "rewards/frontier_coverage_10": 0.14084916114807128, "rewards/frontier_coverage_15": 0.14084916114807128, "rewards/frontier_coverage_20": 0.1310635909438133, "rewards/frontier_coverage_25": 0.09141481071710586, "rewards/frontier_coverage_5": 0.14084916114807128, "rewards/frontier_ece_reward": 0.007165602501481771, "rewards/frontier_entropy_batch_reward": -0.054858258366584776, "signal/accuracy_reward/centered_abs_mean": 0.09466552734375, "signal/accuracy_reward/group_bin_occupancy": 0.17265625, "signal/accuracy_reward/group_std_mean": 0.12934612184762956, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047332763671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.047332763671875, "signal/advantage_abs_mean": 0.05499119162559509, "signal/advantage_pre_scale_abs_mean": 0.05499119162559509, "signal/advantage_pre_scale_std": 0.09592062830924988, "signal/advantage_std": 0.09592062830924988, "signal/brier_reward/centered_abs_mean": 0.12933626472949983, "signal/brier_reward/group_bin_occupancy": 0.810546875, "signal/brier_reward/group_std_mean": 0.16631879806518554, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00596773847937584, "signal/brier_reward/weight": 0.04612754210829735, "signal/brier_reward/weighted_centered_abs_mean": 0.00596773847937584, "signal/format_reward/centered_abs_mean": 0.001251220703125, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0027073150966316463, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006256103515625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006256103515625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023603408131748437, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6890625, "signal/frontier_aurc_reward/group_std_mean": 0.0038498918525874614, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.957094206474722e-05, "signal/frontier_aurc_reward/weight": 0.021001752093434335, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.957094206474722e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1896822929382324, "signal/frontier_coverage_1/group_bin_occupancy": 0.8453125, "signal/frontier_coverage_1/group_std_mean": 0.24189280569553376, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035686347633600234, "signal/frontier_coverage_1/weight": 0.01881442852318287, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035686347633600234, "signal/frontier_coverage_10/centered_abs_mean": 0.1896822929382324, "signal/frontier_coverage_10/group_bin_occupancy": 0.8453125, "signal/frontier_coverage_10/group_std_mean": 0.24189280569553376, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035686347633600234, "signal/frontier_coverage_10/weight": 0.01881442852318287, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035686347633600234, "signal/frontier_coverage_15/centered_abs_mean": 0.1896822929382324, "signal/frontier_coverage_15/group_bin_occupancy": 0.8453125, "signal/frontier_coverage_15/group_std_mean": 0.24189280569553376, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035686347633600234, "signal/frontier_coverage_15/weight": 0.01881442852318287, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035686347633600234, "signal/frontier_coverage_20/centered_abs_mean": 0.17528200447559356, "signal/frontier_coverage_20/group_bin_occupancy": 0.8390625, "signal/frontier_coverage_20/group_std_mean": 0.2239384174346924, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033036709763109686, "signal/frontier_coverage_20/weight": 0.018848678469657897, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033036709763109686, "signal/frontier_coverage_25/centered_abs_mean": 0.11379878968000412, "signal/frontier_coverage_25/group_bin_occupancy": 0.8375, "signal/frontier_coverage_25/group_std_mean": 0.14621945917606355, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021709577180445195, "signal/frontier_coverage_25/weight": 0.019079096987843512, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021709577180445195, "signal/frontier_coverage_5/centered_abs_mean": 0.1896822929382324, "signal/frontier_coverage_5/group_bin_occupancy": 0.8453125, "signal/frontier_coverage_5/group_std_mean": 0.24189280569553376, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035686347633600234, "signal/frontier_coverage_5/weight": 0.01881442852318287, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035686347633600234, "signal/frontier_ece_reward/centered_abs_mean": 0.00886353775858879, "signal/frontier_ece_reward/group_bin_occupancy": 0.728515625, "signal/frontier_ece_reward/group_std_mean": 0.011153610236942769, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012857463909313083, "signal/frontier_ece_reward/weight": 0.14507063925266267, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012857463909313083, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08986649960279465, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5265625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.11396473497152329, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015717819705605506, "signal/frontier_entropy_batch_reward/weight": 0.17491456866264343, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015717819705605506, "step": 190 }, { "adaptive_ema/accuracy_reward": 0.4568694495063633, "adaptive_ema/brier_reward": 0.7067290831899434, "adaptive_ema/format_reward": 0.9464434331151395, "adaptive_ema/frontier_aurc_reward": 0.049100899068368974, "adaptive_ema/frontier_coverage_1": 0.14959135911548901, "adaptive_ema/frontier_coverage_10": 0.14959135911548901, "adaptive_ema/frontier_coverage_15": 0.14959135911548901, "adaptive_ema/frontier_coverage_20": 0.14772546548971083, "adaptive_ema/frontier_coverage_25": 0.13591206081688326, "adaptive_ema/frontier_coverage_5": 0.14959135911548901, "adaptive_ema/frontier_ece_reward": 0.05933345631108433, "adaptive_ema/frontier_entropy_batch_reward": -0.12668591113333794, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.045429503172636033, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.021093441545963286, "adaptive_weight/frontier_coverage_1": 0.01886429451406002, "adaptive_weight/frontier_coverage_10": 0.01886429451406002, "adaptive_weight/frontier_coverage_15": 0.01886429451406002, "adaptive_weight/frontier_coverage_20": 0.018905685842037202, "adaptive_weight/frontier_coverage_25": 0.019167742878198623, "adaptive_weight/frontier_coverage_5": 0.01886429451406002, "adaptive_weight/frontier_ece_reward": 0.14571548700332643, "adaptive_weight/frontier_entropy_batch_reward": 0.17453096210956573, "calibration/aurc": 0.2717893891009608, "calibration/batch_distribution_entropy": 0.9790635308837882, "calibration/batch_entropy_100bins": 0.6924911901826357, "calibration/batch_entropy_10bins": 0.9790635308837882, "calibration/batch_entropy_50bins": 0.7929727048707902, "calibration/batch_uniqueness": 0.8949964966791282, "calibration/buffer_distribution_entropy": 0.9789259595572238, "calibration/buffer_entropy_100bins": 0.7148686390497873, "calibration/buffer_entropy_10bins": 0.9789259595572238, "calibration/buffer_entropy_50bins": 0.8139313132185979, "calibration/confidence_entropy": 0.4959137955381828, "calibration/coverage@0%": 0.008594514432485323, "calibration/coverage@1%": 0.008594514432485323, "calibration/coverage@10%": 0.09182821673189824, "calibration/coverage@15%": 0.2587114726027397, "calibration/coverage@20%": 0.40290866560665356, "calibration/coverage@25%": 0.490063906555773, "calibration/coverage@30%": 0.5912839408023484, "calibration/coverage@5%": 0.008594514432485323, "calibration/ece": 0.10975409017245596, "calibration/mean_confidence": 0.4828339189395792, "calibration/prompt_uniqueness": 0.7414707275136576, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 767.0, "completions/max_terminated_length": 592.4, "completions/mean_length": 214.82958984375, "completions/mean_terminated_length": 214.31283569335938, "completions/min_length": 97.6, "completions/min_terminated_length": 97.6, "epoch": 0.624, "grad_norm": 0.0009802606655284762, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 662916638.0, "reward": 0.807462728023529, "reward_std": 0.07975933402776718, "rewards/accuracy_reward": 0.5240234375, "rewards/brier_reward": 0.7986777186393738, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.002779633179306984, "rewards/frontier_coverage_1": 0.12782124131917955, "rewards/frontier_coverage_10": 0.12782124131917955, "rewards/frontier_coverage_15": 0.12782124131917955, "rewards/frontier_coverage_20": 0.11921733915805817, "rewards/frontier_coverage_25": 0.07854233682155609, "rewards/frontier_coverage_5": 0.12782124131917955, "rewards/frontier_ece_reward": 0.006253256555646658, "rewards/frontier_entropy_batch_reward": -0.0283002408221364, "signal/accuracy_reward/centered_abs_mean": 0.1125732421875, "signal/accuracy_reward/group_bin_occupancy": 0.1765625, "signal/accuracy_reward/group_std_mean": 0.14600562155246735, "signal/accuracy_reward/group_zero_std_frac": 0.5875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05628662109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05628662109375, "signal/advantage_abs_mean": 0.06189834251999855, "signal/advantage_pre_scale_abs_mean": 0.06189834251999855, "signal/advantage_pre_scale_std": 0.10502578765153885, "signal/advantage_std": 0.10502578765153885, "signal/brier_reward/centered_abs_mean": 0.13457954227924346, "signal/brier_reward/group_bin_occupancy": 0.828515625, "signal/brier_reward/group_std_mean": 0.1729972928762436, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.006113854143768549, "signal/brier_reward/weight": 0.045429503172636033, "signal/brier_reward/weighted_centered_abs_mean": 0.006113854143768549, "signal/format_reward/centered_abs_mean": 0.000933837890625, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0024258274119347335, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026384861208498477, "signal/frontier_aurc_reward/group_bin_occupancy": 0.682421875, "signal/frontier_aurc_reward/group_std_mean": 0.004262262210249901, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.5659517965978014e-05, "signal/frontier_aurc_reward/weight": 0.021093441545963286, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.5659517965978014e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1896256685256958, "signal/frontier_coverage_1/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_1/group_std_mean": 0.24310458302497864, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003577108820900321, "signal/frontier_coverage_1/weight": 0.01886429451406002, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003577108820900321, "signal/frontier_coverage_10/centered_abs_mean": 0.1896256685256958, "signal/frontier_coverage_10/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_10/group_std_mean": 0.24310458302497864, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003577108820900321, "signal/frontier_coverage_10/weight": 0.01886429451406002, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003577108820900321, "signal/frontier_coverage_15/centered_abs_mean": 0.1896256685256958, "signal/frontier_coverage_15/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_15/group_std_mean": 0.24310458302497864, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003577108820900321, "signal/frontier_coverage_15/weight": 0.01886429451406002, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003577108820900321, "signal/frontier_coverage_20/centered_abs_mean": 0.17169637084007264, "signal/frontier_coverage_20/group_bin_occupancy": 0.853125, "signal/frontier_coverage_20/group_std_mean": 0.22037405371665955, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003246018523350358, "signal/frontier_coverage_20/weight": 0.018905685842037202, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003246018523350358, "signal/frontier_coverage_25/centered_abs_mean": 0.10143829137086868, "signal/frontier_coverage_25/group_bin_occupancy": 0.857421875, "signal/frontier_coverage_25/group_std_mean": 0.13183338940143585, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019442901015281678, "signal/frontier_coverage_25/weight": 0.019167742878198623, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019442901015281678, "signal/frontier_coverage_5/centered_abs_mean": 0.1896256685256958, "signal/frontier_coverage_5/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_5/group_std_mean": 0.24310458302497864, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003577108820900321, "signal/frontier_coverage_5/weight": 0.01886429451406002, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003577108820900321, "signal/frontier_ece_reward/centered_abs_mean": 0.008129092678427697, "signal/frontier_ece_reward/group_bin_occupancy": 0.74375, "signal/frontier_ece_reward/group_std_mean": 0.0103994682431221, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001184539240784943, "signal/frontier_ece_reward/weight": 0.14571548700332643, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001184539240784943, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.060908643156290056, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.57578125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.07627174183726311, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01063164221122861, "signal/frontier_entropy_batch_reward/weight": 0.17453096210956573, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01063164221122861, "step": 195 }, { "adaptive_ema/accuracy_reward": 0.4612620261651603, "adaptive_ema/brier_reward": 0.711105192442022, "adaptive_ema/format_reward": 0.9490527967788385, "adaptive_ema/frontier_aurc_reward": 0.04655472575071493, "adaptive_ema/frontier_coverage_1": 0.14780793208898826, "adaptive_ema/frontier_coverage_10": 0.14780793208898826, "adaptive_ema/frontier_coverage_15": 0.14780793208898826, "adaptive_ema/frontier_coverage_20": 0.14574844953907218, "adaptive_ema/frontier_coverage_25": 0.1326994788727595, "adaptive_ema/frontier_coverage_5": 0.14780793208898826, "adaptive_ema/frontier_ece_reward": 0.05672248766440877, "adaptive_ema/frontier_entropy_batch_reward": -0.12287724913026521, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.04479944705963135, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.02117249444127083, "adaptive_weight/frontier_coverage_1": 0.018924034759402276, "adaptive_weight/frontier_coverage_10": 0.018924034759402276, "adaptive_weight/frontier_coverage_15": 0.018924034759402276, "adaptive_weight/frontier_coverage_20": 0.018969768285751344, "adaptive_weight/frontier_coverage_25": 0.019259539246559144, "adaptive_weight/frontier_coverage_5": 0.018924034759402276, "adaptive_weight/frontier_ece_reward": 0.14627588391304017, "adaptive_weight/frontier_entropy_batch_reward": 0.17412672638893129, "calibration/aurc": 0.2997646550413567, "calibration/batch_distribution_entropy": 0.9725840439062055, "calibration/batch_entropy_100bins": 0.6915793095789328, "calibration/batch_entropy_10bins": 0.9725840439062055, "calibration/batch_entropy_50bins": 0.7940186093835395, "calibration/batch_uniqueness": 0.8902102105981953, "calibration/buffer_distribution_entropy": 0.9795616752685407, "calibration/buffer_entropy_100bins": 0.7146034846747893, "calibration/buffer_entropy_10bins": 0.9795616752685407, "calibration/buffer_entropy_50bins": 0.8138375318699215, "calibration/confidence_entropy": 0.4847713799679016, "calibration/coverage@0%": 0.00234375, "calibration/coverage@1%": 0.00234375, "calibration/coverage@10%": 0.225, "calibration/coverage@15%": 0.3203125, "calibration/coverage@20%": 0.351171875, "calibration/coverage@25%": 0.3859375, "calibration/coverage@30%": 0.580859375, "calibration/coverage@5%": 0.075390625, "calibration/ece": 0.17217692025440315, "calibration/mean_confidence": 0.5315149217221136, "calibration/prompt_uniqueness": 0.7357859346546566, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 1093.8, "completions/max_terminated_length": 766.4, "completions/mean_length": 219.97646484375, "completions/mean_terminated_length": 219.20455932617188, "completions/min_length": 102.8, "completions/min_terminated_length": 102.8, "epoch": 0.64, "grad_norm": 0.0011383434757590294, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 680511885.0, "reward": 0.8176485538482666, "reward_std": 0.07317476570606232, "rewards/accuracy_reward": 0.56357421875, "rewards/brier_reward": 0.7944719076156617, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.002856502798385918, "rewards/frontier_coverage_1": 0.09920123293995857, "rewards/frontier_coverage_10": 0.09920123293995857, "rewards/frontier_coverage_15": 0.09920123293995857, "rewards/frontier_coverage_20": 0.09386955350637435, "rewards/frontier_coverage_25": 0.06195079907774925, "rewards/frontier_coverage_5": 0.09920123293995857, "rewards/frontier_ece_reward": 0.005887755658477545, "rewards/frontier_entropy_batch_reward": -0.06208570748567581, "signal/accuracy_reward/centered_abs_mean": 0.090887451171875, "signal/accuracy_reward/group_bin_occupancy": 0.17109375, "signal/accuracy_reward/group_std_mean": 0.12432538270950318, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0454437255859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0454437255859375, "signal/advantage_abs_mean": 0.05513864755630493, "signal/advantage_pre_scale_abs_mean": 0.05513864755630493, "signal/advantage_pre_scale_std": 0.09758671969175339, "signal/advantage_std": 0.09758671969175339, "signal/brier_reward/centered_abs_mean": 0.12907020896673202, "signal/brier_reward/group_bin_occupancy": 0.8046875, "signal/brier_reward/group_std_mean": 0.1680096834897995, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0057831091806292536, "signal/brier_reward/weight": 0.04479944705963135, "signal/brier_reward/weighted_centered_abs_mean": 0.0057831091806292536, "signal/format_reward/centered_abs_mean": 0.00106201171875, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.002154887979850173, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000531005859375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000531005859375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029212284367531537, "signal/frontier_aurc_reward/group_bin_occupancy": 0.66796875, "signal/frontier_aurc_reward/group_std_mean": 0.004778983537107706, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.185173115227371e-05, "signal/frontier_aurc_reward/weight": 0.02117249444127083, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.185173115227371e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16748106777667998, "signal/frontier_coverage_1/group_bin_occupancy": 0.85078125, "signal/frontier_coverage_1/group_std_mean": 0.21731913089752197, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003169310325756669, "signal/frontier_coverage_1/weight": 0.018924034759402276, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003169310325756669, "signal/frontier_coverage_10/centered_abs_mean": 0.16748106777667998, "signal/frontier_coverage_10/group_bin_occupancy": 0.85078125, "signal/frontier_coverage_10/group_std_mean": 0.21731913089752197, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003169310325756669, "signal/frontier_coverage_10/weight": 0.018924034759402276, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003169310325756669, "signal/frontier_coverage_15/centered_abs_mean": 0.16748106777667998, "signal/frontier_coverage_15/group_bin_occupancy": 0.85078125, "signal/frontier_coverage_15/group_std_mean": 0.21731913089752197, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003169310325756669, "signal/frontier_coverage_15/weight": 0.018924034759402276, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003169310325756669, "signal/frontier_coverage_20/centered_abs_mean": 0.1481163650751114, "signal/frontier_coverage_20/group_bin_occupancy": 0.839453125, "signal/frontier_coverage_20/group_std_mean": 0.19266715049743652, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002809662418439984, "signal/frontier_coverage_20/weight": 0.018969768285751344, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002809662418439984, "signal/frontier_coverage_25/centered_abs_mean": 0.08351867049932479, "signal/frontier_coverage_25/group_bin_occupancy": 0.859375, "signal/frontier_coverage_25/group_std_mean": 0.10931529998779296, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016084318980574609, "signal/frontier_coverage_25/weight": 0.019259539246559144, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016084318980574609, "signal/frontier_coverage_5/centered_abs_mean": 0.16748106777667998, "signal/frontier_coverage_5/group_bin_occupancy": 0.85078125, "signal/frontier_coverage_5/group_std_mean": 0.21731913089752197, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003169310325756669, "signal/frontier_coverage_5/weight": 0.018924034759402276, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003169310325756669, "signal/frontier_ece_reward/centered_abs_mean": 0.007508605439215899, "signal/frontier_ece_reward/group_bin_occupancy": 0.71875, "signal/frontier_ece_reward/group_std_mean": 0.009579764865338802, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010982972104102372, "signal/frontier_ece_reward/weight": 0.14627588391304017, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010982972104102372, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08711727261543274, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.54375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.10769864320755004, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015169884078204632, "signal/frontier_entropy_batch_reward/weight": 0.17412672638893129, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015169884078204632, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.5075719520741389, "eval_calibration/batch_distribution_entropy": 0.8955715306096013, "eval_calibration/batch_entropy_100bins": 0.5784960452681176, "eval_calibration/batch_entropy_10bins": 0.8955715306096013, "eval_calibration/batch_entropy_50bins": 0.6759995447475304, "eval_calibration/batch_uniqueness": 0.8388671875, "eval_calibration/buffer_distribution_entropy": 0.9798978060512744, "eval_calibration/buffer_entropy_100bins": 0.7148025692200701, "eval_calibration/buffer_entropy_10bins": 0.9798978060512744, "eval_calibration/buffer_entropy_50bins": 0.8141065825865851, "eval_calibration/confidence_entropy": 0.46110577371626493, "eval_calibration/coverage@0%": 0.0390625, "eval_calibration/coverage@1%": 0.0390625, "eval_calibration/coverage@10%": 0.0390625, "eval_calibration/coverage@15%": 0.0390625, "eval_calibration/coverage@20%": 0.125, "eval_calibration/coverage@25%": 0.125, "eval_calibration/coverage@30%": 0.140625, "eval_calibration/coverage@5%": 0.0390625, "eval_calibration/ece": 0.22523437500000001, "eval_calibration/mean_confidence": 0.464296875, "eval_calibration/prompt_uniqueness": 0.8388671875, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 377.25, "eval_completions/max_terminated_length": 377.25, "eval_completions/mean_length": 220.45864868164062, "eval_completions/mean_terminated_length": 220.45864868164062, "eval_completions/min_length": 121.25, "eval_completions/min_terminated_length": 121.25, "eval_loss": 0.0, "eval_num_tokens": 680511885.0, "eval_reward": 0.7376370877027512, "eval_reward_std": 0.2244972214102745, "eval_rewards/accuracy_reward": 0.423828125, "eval_rewards/brier_reward": 0.776837483048439, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0033958147396333516, "eval_rewards/frontier_coverage_1": 0.17307812348008156, "eval_rewards/frontier_coverage_10": 0.17307812348008156, "eval_rewards/frontier_coverage_15": 0.17307812348008156, "eval_rewards/frontier_coverage_20": 0.15501171723008156, "eval_rewards/frontier_coverage_25": 0.08437968976795673, "eval_rewards/frontier_coverage_5": 0.17307812348008156, "eval_rewards/frontier_ece_reward": 0.00601613090839237, "eval_rewards/frontier_entropy_batch_reward": -0.15737152099609375, "eval_runtime": 20.4579, "eval_samples_per_second": 24.44, "eval_signal/accuracy_reward/centered_abs_mean": 0.4764404296875, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4955976828932762, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23822021484375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23822021484375, "eval_signal/advantage_abs_mean": 0.206316988915205, "eval_signal/advantage_pre_scale_abs_mean": 0.206316988915205, "eval_signal/advantage_pre_scale_std": 0.22218631953001022, "eval_signal/advantage_std": 0.22218631953001022, "eval_signal/brier_reward/centered_abs_mean": 0.20140916854143143, "eval_signal/brier_reward/group_bin_occupancy": 0.8984375, "eval_signal/brier_reward/group_std_mean": 0.24949810281395912, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.008972602896392345, "eval_signal/brier_reward/weight": 0.0445491299033165, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.008972602896392345, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004036986967548728, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7578125, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0066258255392313, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.55931230034912e-05, "eval_signal/frontier_aurc_reward/weight": 0.02120222896337509, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.55931230034912e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3758217468857765, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_1/group_std_mean": 0.46960294246673584, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.007119854330085218, "eval_signal/frontier_coverage_1/weight": 0.018944764509797096, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.007119854330085218, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3758217468857765, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_10/group_std_mean": 0.46960294246673584, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.007119854330085218, "eval_signal/frontier_coverage_10/weight": 0.018944764509797096, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.007119854330085218, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3758217468857765, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_15/group_std_mean": 0.46960294246673584, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007119854330085218, "eval_signal/frontier_coverage_15/weight": 0.018944764509797096, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.007119854330085218, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3399077132344246, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_20/group_std_mean": 0.42540228366851807, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006456138915382326, "eval_signal/frontier_coverage_20/weight": 0.0189937986433506, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006456138915382326, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.1638285294175148, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9296875, "eval_signal/frontier_coverage_25/group_std_mean": 0.21629228815436363, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031611379818059504, "eval_signal/frontier_coverage_25/weight": 0.019295405596494675, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031611379818059504, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3758217468857765, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_5/group_std_mean": 0.46960294246673584, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.007119854330085218, "eval_signal/frontier_coverage_5/weight": 0.018944764509797096, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.007119854330085218, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.01058367220684886, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9296875, "eval_signal/frontier_ece_reward/group_std_mean": 0.013392903376370668, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015503591566812247, "eval_signal/frontier_ece_reward/weight": 0.14648593962192535, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015503591566812247, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21271085739135742, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5625, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.2711870074272156, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.037010504864156246, "eval_signal/frontier_entropy_batch_reward/weight": 0.17399443686008453, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.037010504864156246, "eval_steps_per_second": 0.196, "step": 200 }, { "adaptive_ema/accuracy_reward": 0.46409849546061094, "adaptive_ema/brier_reward": 0.7145793846897758, "adaptive_ema/format_reward": 0.9515267914964713, "adaptive_ema/frontier_aurc_reward": 0.04411230896135079, "adaptive_ema/frontier_coverage_1": 0.14622026044036013, "adaptive_ema/frontier_coverage_10": 0.14622026044036013, "adaptive_ema/frontier_coverage_15": 0.14622026044036013, "adaptive_ema/frontier_coverage_20": 0.1436728228617165, "adaptive_ema/frontier_coverage_25": 0.12934636947766204, "adaptive_ema/frontier_coverage_5": 0.14622026044036013, "adaptive_ema/frontier_ece_reward": 0.0542079172449618, "adaptive_ema/frontier_entropy_batch_reward": -0.11904105378724132, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.04429857730865479, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.021244903653860094, "adaptive_weight/frontier_coverage_1": 0.018975519016385077, "adaptive_weight/frontier_coverage_10": 0.018975519016385077, "adaptive_weight/frontier_coverage_15": 0.018975519016385077, "adaptive_weight/frontier_coverage_20": 0.019032138213515282, "adaptive_weight/frontier_coverage_25": 0.019350550696253778, "adaptive_weight/frontier_coverage_5": 0.018975519016385077, "adaptive_weight/frontier_ece_reward": 0.14679138362407684, "adaptive_weight/frontier_entropy_batch_reward": 0.1736803650856018, "calibration/aurc": 0.4643865706258444, "calibration/batch_distribution_entropy": 0.9674062131351813, "calibration/batch_entropy_100bins": 0.7019551646206266, "calibration/batch_entropy_10bins": 0.9674062131351813, "calibration/batch_entropy_50bins": 0.8050765440156435, "calibration/batch_uniqueness": 0.8971722714759565, "calibration/buffer_distribution_entropy": 0.9801938657893574, "calibration/buffer_entropy_100bins": 0.7151813913822835, "calibration/buffer_entropy_10bins": 0.9801938657893574, "calibration/buffer_entropy_50bins": 0.8145700052375018, "calibration/confidence_entropy": 0.499118723854232, "calibration/coverage@0%": 0.0019554182974559687, "calibration/coverage@1%": 0.0019554182974559687, "calibration/coverage@10%": 0.0019554182974559687, "calibration/coverage@15%": 0.0019554182974559687, "calibration/coverage@20%": 0.031653620352250486, "calibration/coverage@25%": 0.10196764921722115, "calibration/coverage@30%": 0.13126834637964774, "calibration/coverage@5%": 0.0019554182974559687, "calibration/ece": 0.12957827013098205, "calibration/mean_confidence": 0.4499429199375111, "calibration/prompt_uniqueness": 0.7584693678459937, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 742.2, "completions/max_terminated_length": 524.2, "completions/mean_length": 221.072265625, "completions/mean_terminated_length": 220.94376220703126, "completions/min_length": 77.8, "completions/min_terminated_length": 77.8, "epoch": 0.656, "grad_norm": 0.000815459294244647, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 697632209.0, "reward": 0.7870907783508301, "reward_std": 0.07756655365228653, "rewards/accuracy_reward": 0.4931640625, "rewards/brier_reward": 0.7724098086357116, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0036199231166392566, "rewards/frontier_coverage_1": 0.12345087826251984, "rewards/frontier_coverage_10": 0.12345087826251984, "rewards/frontier_coverage_15": 0.12345087826251984, "rewards/frontier_coverage_20": 0.10757764428853989, "rewards/frontier_coverage_25": 0.06548063829541206, "rewards/frontier_coverage_5": 0.12345087826251984, "rewards/frontier_ece_reward": 0.0050185761414468285, "rewards/frontier_entropy_batch_reward": -0.039812687784433365, "signal/accuracy_reward/centered_abs_mean": 0.09989013671875, "signal/accuracy_reward/group_bin_occupancy": 0.175, "signal/accuracy_reward/group_std_mean": 0.13554909825325012, "signal/accuracy_reward/group_zero_std_frac": 0.6, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049945068359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049945068359375, "signal/advantage_abs_mean": 0.05884964838624, "signal/advantage_pre_scale_abs_mean": 0.05884964838624, "signal/advantage_pre_scale_std": 0.10190211534500122, "signal/advantage_std": 0.10190211534500122, "signal/brier_reward/centered_abs_mean": 0.14087184071540831, "signal/brier_reward/group_bin_occupancy": 0.8328125, "signal/brier_reward/group_std_mean": 0.17969867587089539, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.006240516621619463, "signal/brier_reward/weight": 0.04429857730865479, "signal/brier_reward/weighted_centered_abs_mean": 0.006240516621619463, "signal/format_reward/centered_abs_mean": 0.00074462890625, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0018734002020210027, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000372314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.003362104669213295, "signal/frontier_aurc_reward/group_bin_occupancy": 0.671484375, "signal/frontier_aurc_reward/group_std_mean": 0.005673701735213399, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.1434101118939e-05, "signal/frontier_aurc_reward/weight": 0.021244903653860094, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.1434101118939e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18643584847450256, "signal/frontier_coverage_1/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_1/group_std_mean": 0.24093481302261352, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003537702839821577, "signal/frontier_coverage_1/weight": 0.018975519016385077, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003537702839821577, "signal/frontier_coverage_10/centered_abs_mean": 0.18643584847450256, "signal/frontier_coverage_10/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_10/group_std_mean": 0.24093481302261352, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003537702839821577, "signal/frontier_coverage_10/weight": 0.018975519016385077, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003537702839821577, "signal/frontier_coverage_15/centered_abs_mean": 0.18643584847450256, "signal/frontier_coverage_15/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_15/group_std_mean": 0.24093481302261352, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003537702839821577, "signal/frontier_coverage_15/weight": 0.018975519016385077, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003537702839821577, "signal/frontier_coverage_20/centered_abs_mean": 0.1628478139638901, "signal/frontier_coverage_20/group_bin_occupancy": 0.861328125, "signal/frontier_coverage_20/group_std_mean": 0.21116604804992675, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003099315334111452, "signal/frontier_coverage_20/weight": 0.019032138213515282, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003099315334111452, "signal/frontier_coverage_25/centered_abs_mean": 0.09084706604480744, "signal/frontier_coverage_25/group_bin_occupancy": 0.86640625, "signal/frontier_coverage_25/group_std_mean": 0.11897308528423309, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017579408595338464, "signal/frontier_coverage_25/weight": 0.019350550696253778, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017579408595338464, "signal/frontier_coverage_5/centered_abs_mean": 0.18643584847450256, "signal/frontier_coverage_5/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_5/group_std_mean": 0.24093481302261352, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003537702839821577, "signal/frontier_coverage_5/weight": 0.018975519016385077, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003537702839821577, "signal/frontier_ece_reward/centered_abs_mean": 0.007442523166537285, "signal/frontier_ece_reward/group_bin_occupancy": 0.75546875, "signal/frontier_ece_reward/group_std_mean": 0.009600348770618439, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001092497631907463, "signal/frontier_ece_reward/weight": 0.14679138362407684, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001092497631907463, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06799670606851578, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.538671875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.08818065077066421, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01180825624614954, "signal/frontier_entropy_batch_reward/weight": 0.1736803650856018, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01180825624614954, "step": 205 }, { "adaptive_ema/accuracy_reward": 0.4662320707951559, "adaptive_ema/brier_reward": 0.7176291992470631, "adaptive_ema/format_reward": 0.9538852598740333, "adaptive_ema/frontier_aurc_reward": 0.041774003972822785, "adaptive_ema/frontier_coverage_1": 0.14537093734430143, "adaptive_ema/frontier_coverage_10": 0.14537093734430143, "adaptive_ema/frontier_coverage_15": 0.14537093734430143, "adaptive_ema/frontier_coverage_20": 0.1422973893660083, "adaptive_ema/frontier_coverage_25": 0.12662304782701647, "adaptive_ema/frontier_coverage_5": 0.14537093734430143, "adaptive_ema/frontier_ece_reward": 0.05181695449107977, "adaptive_ema/frontier_entropy_batch_reward": -0.11663399660211912, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.043847785145044324, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.021307827904820442, "adaptive_weight/frontier_coverage_1": 0.01900416538119316, "adaptive_weight/frontier_coverage_10": 0.01900416538119316, "adaptive_weight/frontier_coverage_15": 0.01900416538119316, "adaptive_weight/frontier_coverage_20": 0.019072511792182924, "adaptive_weight/frontier_coverage_25": 0.019421060755848884, "adaptive_weight/frontier_coverage_5": 0.01900416538119316, "adaptive_weight/frontier_ece_reward": 0.14723817110061646, "adaptive_weight/frontier_entropy_batch_reward": 0.17339597642421722, "calibration/aurc": 0.31164496797877933, "calibration/batch_distribution_entropy": 0.9663074056547905, "calibration/batch_entropy_100bins": 0.6888697683751974, "calibration/batch_entropy_10bins": 0.9663074056547905, "calibration/batch_entropy_50bins": 0.7888140527081898, "calibration/batch_uniqueness": 0.8817779541015625, "calibration/buffer_distribution_entropy": 0.9812080629481033, "calibration/buffer_entropy_100bins": 0.7160872698038366, "calibration/buffer_entropy_10bins": 0.9812080629481033, "calibration/buffer_entropy_50bins": 0.8154865216539655, "calibration/confidence_entropy": 0.45991284860584025, "calibration/coverage@0%": 0.01796875, "calibration/coverage@1%": 0.01796875, "calibration/coverage@10%": 0.13046875, "calibration/coverage@15%": 0.178515625, "calibration/coverage@20%": 0.26328125, "calibration/coverage@25%": 0.31796875, "calibration/coverage@30%": 0.443359375, "calibration/coverage@5%": 0.037890625, "calibration/ece": 0.145015625, "calibration/mean_confidence": 0.467203125, "calibration/prompt_uniqueness": 0.693310546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 968.0, "completions/max_terminated_length": 533.2, "completions/mean_length": 220.28994140625, "completions/mean_terminated_length": 220.0336151123047, "completions/min_length": 95.2, "completions/min_terminated_length": 95.2, "epoch": 0.672, "grad_norm": 0.0017700539901852608, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 714801418.0, "reward": 0.7908611178398133, "reward_std": 0.07649894952774047, "rewards/accuracy_reward": 0.5140625, "rewards/brier_reward": 0.7871044278144836, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.00326902256347239, "rewards/frontier_coverage_1": 0.1457797884941101, "rewards/frontier_coverage_10": 0.1457797884941101, "rewards/frontier_coverage_15": 0.1457797884941101, "rewards/frontier_coverage_20": 0.13143573254346846, "rewards/frontier_coverage_25": 0.08514134362339973, "rewards/frontier_coverage_5": 0.1457797884941101, "rewards/frontier_ece_reward": 0.006153030414134264, "rewards/frontier_entropy_batch_reward": -0.09628089219331741, "signal/accuracy_reward/centered_abs_mean": 0.1017822265625, "signal/accuracy_reward/group_bin_occupancy": 0.17421875, "signal/accuracy_reward/group_std_mean": 0.13586196452379226, "signal/accuracy_reward/group_zero_std_frac": 0.60625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05089111328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05089111328125, "signal/advantage_abs_mean": 0.05836946368217468, "signal/advantage_pre_scale_abs_mean": 0.05836946368217468, "signal/advantage_pre_scale_std": 0.09875391572713851, "signal/advantage_std": 0.09875391572713851, "signal/brier_reward/centered_abs_mean": 0.14277728796005248, "signal/brier_reward/group_bin_occupancy": 0.794140625, "signal/brier_reward/group_std_mean": 0.18211204409599305, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00626028710976243, "signal/brier_reward/weight": 0.043847785145044324, "signal/brier_reward/weighted_centered_abs_mean": 0.00626028710976243, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.003248645691201091, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6609375, "signal/frontier_aurc_reward/group_std_mean": 0.005463304091244936, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.921743770362809e-05, "signal/frontier_aurc_reward/weight": 0.021307827904820442, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.921743770362809e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2025707632303238, "signal/frontier_coverage_1/group_bin_occupancy": 0.8296875, "signal/frontier_coverage_1/group_std_mean": 0.2575278103351593, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00384969306178391, "signal/frontier_coverage_1/weight": 0.01900416538119316, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00384969306178391, "signal/frontier_coverage_10/centered_abs_mean": 0.2025707632303238, "signal/frontier_coverage_10/group_bin_occupancy": 0.8296875, "signal/frontier_coverage_10/group_std_mean": 0.2575278103351593, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00384969306178391, "signal/frontier_coverage_10/weight": 0.01900416538119316, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00384969306178391, "signal/frontier_coverage_15/centered_abs_mean": 0.2025707632303238, "signal/frontier_coverage_15/group_bin_occupancy": 0.8296875, "signal/frontier_coverage_15/group_std_mean": 0.2575278103351593, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00384969306178391, "signal/frontier_coverage_15/weight": 0.01900416538119316, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00384969306178391, "signal/frontier_coverage_20/centered_abs_mean": 0.1766321986913681, "signal/frontier_coverage_20/group_bin_occupancy": 0.82890625, "signal/frontier_coverage_20/group_std_mean": 0.2250331699848175, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003368807211518288, "signal/frontier_coverage_20/weight": 0.019072511792182924, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003368807211518288, "signal/frontier_coverage_25/centered_abs_mean": 0.10147839039564133, "signal/frontier_coverage_25/group_bin_occupancy": 0.8515625, "signal/frontier_coverage_25/group_std_mean": 0.13023962080478668, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019708910025656224, "signal/frontier_coverage_25/weight": 0.019421060755848884, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019708910025656224, "signal/frontier_coverage_5/centered_abs_mean": 0.2025707632303238, "signal/frontier_coverage_5/group_bin_occupancy": 0.8296875, "signal/frontier_coverage_5/group_std_mean": 0.2575278103351593, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00384969306178391, "signal/frontier_coverage_5/weight": 0.01900416538119316, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00384969306178391, "signal/frontier_ece_reward/centered_abs_mean": 0.008110674936324358, "signal/frontier_ece_reward/group_bin_occupancy": 0.7265625, "signal/frontier_ece_reward/group_std_mean": 0.010212619230151177, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00119422294665128, "signal/frontier_ece_reward/weight": 0.14723817110061646, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00119422294665128, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10843141078948974, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.534375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.13856386244297028, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01880161128938198, "signal/frontier_entropy_batch_reward/weight": 0.17339597642421722, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01880161128938198, "step": 210 }, { "adaptive_ema/accuracy_reward": 0.46830501246117856, "adaptive_ema/brier_reward": 0.7211254637666132, "adaptive_ema/format_reward": 0.9560803170351629, "adaptive_ema/frontier_aurc_reward": 0.03958305982387139, "adaptive_ema/frontier_coverage_1": 0.1453660869004376, "adaptive_ema/frontier_coverage_10": 0.1453660869004376, "adaptive_ema/frontier_coverage_15": 0.1453660869004376, "adaptive_ema/frontier_coverage_20": 0.1413463773504267, "adaptive_ema/frontier_coverage_25": 0.124265991660699, "adaptive_ema/frontier_coverage_5": 0.1453660869004376, "adaptive_ema/frontier_ece_reward": 0.0495718913967398, "adaptive_ema/frontier_entropy_batch_reward": -0.11451944268600305, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.043339500576257704, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.0213736355304718, "adaptive_weight/frontier_coverage_1": 0.019019480049610137, "adaptive_weight/frontier_coverage_10": 0.019019480049610137, "adaptive_weight/frontier_coverage_15": 0.019019480049610137, "adaptive_weight/frontier_coverage_20": 0.019108938798308373, "adaptive_weight/frontier_coverage_25": 0.019489056989550592, "adaptive_weight/frontier_coverage_5": 0.019019480049610137, "adaptive_weight/frontier_ece_reward": 0.1477048873901367, "adaptive_weight/frontier_entropy_batch_reward": 0.17320606112480164, "calibration/aurc": 0.33968256897510185, "calibration/batch_distribution_entropy": 0.9683258088969448, "calibration/batch_entropy_100bins": 0.6988123005977697, "calibration/batch_entropy_10bins": 0.9683258088969448, "calibration/batch_entropy_50bins": 0.8007284420968486, "calibration/batch_uniqueness": 0.8925719595703125, "calibration/buffer_distribution_entropy": 0.9820925332574466, "calibration/buffer_entropy_100bins": 0.7173383975913089, "calibration/buffer_entropy_10bins": 0.9820925332574466, "calibration/buffer_entropy_50bins": 0.8165489914272855, "calibration/confidence_entropy": 0.48792837399947214, "calibration/coverage@0%": 0.00078125, "calibration/coverage@1%": 0.00078125, "calibration/coverage@10%": 0.027734375, "calibration/coverage@15%": 0.14743124999999999, "calibration/coverage@20%": 0.26663125, "calibration/coverage@25%": 0.43825312499999997, "calibration/coverage@30%": 0.5667187499999999, "calibration/coverage@5%": 0.00078125, "calibration/ece": 0.149918890625, "calibration/mean_confidence": 0.454228140625, "calibration/prompt_uniqueness": 0.721783203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 586.0, "completions/max_terminated_length": 586.0, "completions/mean_length": 223.670703125, "completions/mean_terminated_length": 223.670703125, "completions/min_length": 104.2, "completions/min_terminated_length": 104.2, "epoch": 0.688, "grad_norm": 0.000957864336669445, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 732045726.0, "reward": 0.8029464364051819, "reward_std": 0.07481988817453385, "rewards/accuracy_reward": 0.53095703125, "rewards/brier_reward": 0.7833613395690918, "rewards/format_reward": 0.998828125, "rewards/frontier_aurc_reward": -0.0027452859794721007, "rewards/frontier_coverage_1": 0.116272484511137, "rewards/frontier_coverage_10": 0.116272484511137, "rewards/frontier_coverage_15": 0.116272484511137, "rewards/frontier_coverage_20": 0.09603669866919518, "rewards/frontier_coverage_25": 0.062441585958003996, "rewards/frontier_coverage_5": 0.116272484511137, "rewards/frontier_ece_reward": 0.005209988867864013, "rewards/frontier_entropy_batch_reward": -0.0494648601859808, "signal/accuracy_reward/centered_abs_mean": 0.102288818359375, "signal/accuracy_reward/group_bin_occupancy": 0.176171875, "signal/accuracy_reward/group_std_mean": 0.13836176693439484, "signal/accuracy_reward/group_zero_std_frac": 0.590625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0511444091796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0511444091796875, "signal/advantage_abs_mean": 0.056684716045856474, "signal/advantage_pre_scale_abs_mean": 0.056684716045856474, "signal/advantage_pre_scale_std": 0.09830449968576431, "signal/advantage_std": 0.09830449968576431, "signal/brier_reward/centered_abs_mean": 0.13682132959365845, "signal/brier_reward/group_bin_occupancy": 0.8078125, "signal/brier_reward/group_std_mean": 0.1752842426300049, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005929951835423708, "signal/brier_reward/weight": 0.043339500576257704, "signal/brier_reward/weighted_centered_abs_mean": 0.005929951835423708, "signal/format_reward/centered_abs_mean": 0.00146484375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.001537091750651598, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000732421875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000732421875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025705496780574323, "signal/frontier_aurc_reward/group_bin_occupancy": 0.690234375, "signal/frontier_aurc_reward/group_std_mean": 0.004285382106900215, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.493503122124821e-05, "signal/frontier_aurc_reward/weight": 0.0213736355304718, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.493503122124821e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19760897159576415, "signal/frontier_coverage_1/group_bin_occupancy": 0.840625, "signal/frontier_coverage_1/group_std_mean": 0.25142764747142793, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037585416808724403, "signal/frontier_coverage_1/weight": 0.019019480049610137, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037585416808724403, "signal/frontier_coverage_10/centered_abs_mean": 0.19760897159576415, "signal/frontier_coverage_10/group_bin_occupancy": 0.840625, "signal/frontier_coverage_10/group_std_mean": 0.25142764747142793, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037585416808724403, "signal/frontier_coverage_10/weight": 0.019019480049610137, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037585416808724403, "signal/frontier_coverage_15/centered_abs_mean": 0.19760897159576415, "signal/frontier_coverage_15/group_bin_occupancy": 0.840625, "signal/frontier_coverage_15/group_std_mean": 0.25142764747142793, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037585416808724403, "signal/frontier_coverage_15/weight": 0.019019480049610137, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037585416808724403, "signal/frontier_coverage_20/centered_abs_mean": 0.1612505316734314, "signal/frontier_coverage_20/group_bin_occupancy": 0.83671875, "signal/frontier_coverage_20/group_std_mean": 0.20602917075157165, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030814073979854584, "signal/frontier_coverage_20/weight": 0.019108938798308373, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030814073979854584, "signal/frontier_coverage_25/centered_abs_mean": 0.09459872543811798, "signal/frontier_coverage_25/group_bin_occupancy": 0.861328125, "signal/frontier_coverage_25/group_std_mean": 0.12124393731355668, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018436332466080785, "signal/frontier_coverage_25/weight": 0.019489056989550592, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018436332466080785, "signal/frontier_coverage_5/centered_abs_mean": 0.19760897159576415, "signal/frontier_coverage_5/group_bin_occupancy": 0.840625, "signal/frontier_coverage_5/group_std_mean": 0.25142764747142793, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037585416808724403, "signal/frontier_coverage_5/weight": 0.019019480049610137, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037585416808724403, "signal/frontier_ece_reward/centered_abs_mean": 0.007358336262404919, "signal/frontier_ece_reward/group_bin_occupancy": 0.72421875, "signal/frontier_ece_reward/group_std_mean": 0.009352485835552215, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010868682526051997, "signal/frontier_ece_reward/weight": 0.1477048873901367, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010868682526051997, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08006046563386918, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.512890625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.1021534651517868, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013866147492080927, "signal/frontier_entropy_batch_reward/weight": 0.17320606112480164, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013866147492080927, "step": 215 }, { "adaptive_ema/accuracy_reward": 0.4714226536770599, "adaptive_ema/brier_reward": 0.7245687330855672, "adaptive_ema/format_reward": 0.9582328185056775, "adaptive_ema/frontier_aurc_reward": 0.037501169886741494, "adaptive_ema/frontier_coverage_1": 0.1441238258614169, "adaptive_ema/frontier_coverage_10": 0.1441238258614169, "adaptive_ema/frontier_coverage_15": 0.1441238258614169, "adaptive_ema/frontier_coverage_20": 0.13939415817970868, "adaptive_ema/frontier_coverage_25": 0.12134546498309087, "adaptive_ema/frontier_coverage_5": 0.1441238258614169, "adaptive_ema/frontier_ece_reward": 0.047401946277577975, "adaptive_ema/frontier_entropy_batch_reward": -0.11144402296807562, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.04283955916762352, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.02143755294382572, "adaptive_weight/frontier_coverage_1": 0.019062766060233115, "adaptive_weight/frontier_coverage_10": 0.019062766060233115, "adaptive_weight/frontier_coverage_15": 0.019062766060233115, "adaptive_weight/frontier_coverage_20": 0.019168110191822053, "adaptive_weight/frontier_coverage_25": 0.019570105895400047, "adaptive_weight/frontier_coverage_5": 0.019062766060233115, "adaptive_weight/frontier_ece_reward": 0.14816365540027618, "adaptive_weight/frontier_entropy_batch_reward": 0.1728699505329132, "calibration/aurc": 0.2812223914615854, "calibration/batch_distribution_entropy": 0.9758433970044276, "calibration/batch_entropy_100bins": 0.6897804717683991, "calibration/batch_entropy_10bins": 0.9758433970044276, "calibration/batch_entropy_50bins": 0.7924209928664961, "calibration/batch_uniqueness": 0.8869873046875, "calibration/buffer_distribution_entropy": 0.9829288465120015, "calibration/buffer_entropy_100bins": 0.7186293500721178, "calibration/buffer_entropy_10bins": 0.9829288465120015, "calibration/buffer_entropy_50bins": 0.8175469891347094, "calibration/confidence_entropy": 0.46794092314030333, "calibration/coverage@0%": 0.0046875, "calibration/coverage@1%": 0.0046875, "calibration/coverage@10%": 0.044140625, "calibration/coverage@15%": 0.176953125, "calibration/coverage@20%": 0.25234375, "calibration/coverage@25%": 0.408984375, "calibration/coverage@30%": 0.62265625, "calibration/coverage@5%": 0.0046875, "calibration/ece": 0.11982812500000002, "calibration/mean_confidence": 0.496578125, "calibration/prompt_uniqueness": 0.688330078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 534.0, "completions/max_terminated_length": 534.0, "completions/mean_length": 224.83818359375, "completions/mean_terminated_length": 224.83818359375, "completions/min_length": 102.8, "completions/min_terminated_length": 102.8, "epoch": 0.704, "grad_norm": 0.0006922923494130373, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 749214213.0, "reward": 0.808210837841034, "reward_std": 0.06902927160263062, "rewards/accuracy_reward": 0.5416015625, "rewards/brier_reward": 0.7921440362930298, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0030134733766317367, "rewards/frontier_coverage_1": 0.1116558000445366, "rewards/frontier_coverage_10": 0.1116558000445366, "rewards/frontier_coverage_15": 0.1116558000445366, "rewards/frontier_coverage_20": 0.09597670137882233, "rewards/frontier_coverage_25": 0.06279801651835441, "rewards/frontier_coverage_5": 0.1116558000445366, "rewards/frontier_ece_reward": 0.005176219716668129, "rewards/frontier_entropy_batch_reward": -0.05135298371315002, "signal/accuracy_reward/centered_abs_mean": 0.08553466796875, "signal/accuracy_reward/group_bin_occupancy": 0.169921875, "signal/accuracy_reward/group_std_mean": 0.11762015819549561, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042767333984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.042767333984375, "signal/advantage_abs_mean": 0.052474270761013034, "signal/advantage_pre_scale_abs_mean": 0.052474270761013034, "signal/advantage_pre_scale_std": 0.0917344942688942, "signal/advantage_std": 0.0917344942688942, "signal/brier_reward/centered_abs_mean": 0.13383509516716002, "signal/brier_reward/group_bin_occupancy": 0.8109375, "signal/brier_reward/group_std_mean": 0.17180217504501344, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005733540188521147, "signal/brier_reward/weight": 0.04283955916762352, "signal/brier_reward/weighted_centered_abs_mean": 0.005733540188521147, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028782275738194587, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6734375, "signal/frontier_aurc_reward/group_std_mean": 0.0045921245589852335, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.170250489958561e-05, "signal/frontier_aurc_reward/weight": 0.02143755294382572, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.170250489958561e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18142320215702057, "signal/frontier_coverage_1/group_bin_occupancy": 0.840234375, "signal/frontier_coverage_1/group_std_mean": 0.2332333356142044, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034584174398332836, "signal/frontier_coverage_1/weight": 0.019062766060233115, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034584174398332836, "signal/frontier_coverage_10/centered_abs_mean": 0.18142320215702057, "signal/frontier_coverage_10/group_bin_occupancy": 0.840234375, "signal/frontier_coverage_10/group_std_mean": 0.2332333356142044, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034584174398332836, "signal/frontier_coverage_10/weight": 0.019062766060233115, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034584174398332836, "signal/frontier_coverage_15/centered_abs_mean": 0.18142320215702057, "signal/frontier_coverage_15/group_bin_occupancy": 0.840234375, "signal/frontier_coverage_15/group_std_mean": 0.2332333356142044, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034584174398332836, "signal/frontier_coverage_15/weight": 0.019062766060233115, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034584174398332836, "signal/frontier_coverage_20/centered_abs_mean": 0.14190484583377838, "signal/frontier_coverage_20/group_bin_occupancy": 0.831640625, "signal/frontier_coverage_20/group_std_mean": 0.1834350824356079, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027199681848287583, "signal/frontier_coverage_20/weight": 0.019168110191822053, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027199681848287583, "signal/frontier_coverage_25/centered_abs_mean": 0.08425341546535492, "signal/frontier_coverage_25/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_25/group_std_mean": 0.1094443678855896, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016488022403791547, "signal/frontier_coverage_25/weight": 0.019570105895400047, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016488022403791547, "signal/frontier_coverage_5/centered_abs_mean": 0.18142320215702057, "signal/frontier_coverage_5/group_bin_occupancy": 0.840234375, "signal/frontier_coverage_5/group_std_mean": 0.2332333356142044, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034584174398332836, "signal/frontier_coverage_5/weight": 0.019062766060233115, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034584174398332836, "signal/frontier_ece_reward/centered_abs_mean": 0.007252698577940464, "signal/frontier_ece_reward/group_bin_occupancy": 0.70625, "signal/frontier_ece_reward/group_std_mean": 0.009230658039450645, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010745900450274347, "signal/frontier_ece_reward/weight": 0.14816365540027618, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010745900450274347, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07674806118011475, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5296875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.09836492389440536, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013267523981630803, "signal/frontier_entropy_batch_reward/weight": 0.1728699505329132, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013267523981630803, "step": 220 }, { "adaptive_ema/accuracy_reward": 0.4757423695441325, "adaptive_ema/brier_reward": 0.7279110297441888, "adaptive_ema/format_reward": 0.9602750013781336, "adaptive_ema/frontier_aurc_reward": 0.03552184370867116, "adaptive_ema/frontier_coverage_1": 0.14200073041665678, "adaptive_ema/frontier_coverage_10": 0.14200073041665678, "adaptive_ema/frontier_coverage_15": 0.14200073041665678, "adaptive_ema/frontier_coverage_20": 0.13670227580543237, "adaptive_ema/frontier_coverage_25": 0.11821497614335949, "adaptive_ema/frontier_coverage_5": 0.14200073041665678, "adaptive_ema/frontier_ece_reward": 0.04532540390968216, "adaptive_ema/frontier_entropy_batch_reward": -0.10826526292922997, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.04234748482704163, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.021495748311281204, "adaptive_weight/frontier_coverage_1": 0.01912260763347149, "adaptive_weight/frontier_coverage_10": 0.01912260763347149, "adaptive_weight/frontier_coverage_15": 0.01912260763347149, "adaptive_weight/frontier_coverage_20": 0.019240697100758552, "adaptive_weight/frontier_coverage_25": 0.01965273208916187, "adaptive_weight/frontier_coverage_5": 0.01912260763347149, "adaptive_weight/frontier_ece_reward": 0.14858417212963104, "adaptive_weight/frontier_entropy_batch_reward": 0.172488734126091, "calibration/aurc": 0.26860360703296127, "calibration/batch_distribution_entropy": 0.9832870149703636, "calibration/batch_entropy_100bins": 0.7096184414476265, "calibration/batch_entropy_10bins": 0.9832870149703636, "calibration/batch_entropy_50bins": 0.8095542428846242, "calibration/batch_uniqueness": 0.9003417386613531, "calibration/buffer_distribution_entropy": 0.9841210799372659, "calibration/buffer_entropy_100bins": 0.7201560903265631, "calibration/buffer_entropy_10bins": 0.9841210799372659, "calibration/buffer_entropy_50bins": 0.8186714930627087, "calibration/confidence_entropy": 0.4777731746405983, "calibration/coverage@0%": 0.002734375, "calibration/coverage@1%": 0.002734375, "calibration/coverage@10%": 0.18984375, "calibration/coverage@15%": 0.24609375, "calibration/coverage@20%": 0.37578125, "calibration/coverage@25%": 0.55234375, "calibration/coverage@30%": 0.629296875, "calibration/coverage@5%": 0.1015625, "calibration/ece": 0.14447926576259787, "calibration/mean_confidence": 0.4992754890074608, "calibration/prompt_uniqueness": 0.7325255776047086, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 913.6, "completions/max_terminated_length": 521.0, "completions/mean_length": 223.48916015625, "completions/mean_terminated_length": 223.23304748535156, "completions/min_length": 101.4, "completions/min_terminated_length": 101.4, "epoch": 0.72, "grad_norm": 0.0009339398820884526, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 766512598.0, "reward": 0.8165707230567932, "reward_std": 0.07224588543176651, "rewards/accuracy_reward": 0.55751953125, "rewards/brier_reward": 0.7996157526969909, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002686341805383563, "rewards/frontier_coverage_1": 0.10848188325762749, "rewards/frontier_coverage_10": 0.10848188325762749, "rewards/frontier_coverage_15": 0.10848188325762749, "rewards/frontier_coverage_20": 0.09124723896384239, "rewards/frontier_coverage_25": 0.06009881421923637, "rewards/frontier_coverage_5": 0.10848188325762749, "rewards/frontier_ece_reward": 0.0052391432225704195, "rewards/frontier_entropy_batch_reward": -0.04627900570631027, "signal/accuracy_reward/centered_abs_mean": 0.093353271484375, "signal/accuracy_reward/group_bin_occupancy": 0.171875, "signal/accuracy_reward/group_std_mean": 0.12714605182409286, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0466766357421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0466766357421875, "signal/advantage_abs_mean": 0.05387095212936401, "signal/advantage_pre_scale_abs_mean": 0.05387095212936401, "signal/advantage_pre_scale_std": 0.0951445609331131, "signal/advantage_std": 0.0951445609331131, "signal/brier_reward/centered_abs_mean": 0.12789989858865738, "signal/brier_reward/group_bin_occupancy": 0.81640625, "signal/brier_reward/group_std_mean": 0.16612654328346252, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005415673460811376, "signal/brier_reward/weight": 0.04234748482704163, "signal/brier_reward/weighted_centered_abs_mean": 0.005415673460811376, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025467033963650467, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6765625, "signal/frontier_aurc_reward/group_std_mean": 0.004340066667646169, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4739906045142564e-05, "signal/frontier_aurc_reward/weight": 0.021495748311281204, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4739906045142564e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17897019982337953, "signal/frontier_coverage_1/group_bin_occupancy": 0.85, "signal/frontier_coverage_1/group_std_mean": 0.2304094761610031, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003422551741823554, "signal/frontier_coverage_1/weight": 0.01912260763347149, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003422551741823554, "signal/frontier_coverage_10/centered_abs_mean": 0.17897019982337953, "signal/frontier_coverage_10/group_bin_occupancy": 0.85, "signal/frontier_coverage_10/group_std_mean": 0.2304094761610031, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003422551741823554, "signal/frontier_coverage_10/weight": 0.01912260763347149, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003422551741823554, "signal/frontier_coverage_15/centered_abs_mean": 0.17897019982337953, "signal/frontier_coverage_15/group_bin_occupancy": 0.85, "signal/frontier_coverage_15/group_std_mean": 0.2304094761610031, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003422551741823554, "signal/frontier_coverage_15/weight": 0.01912260763347149, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003422551741823554, "signal/frontier_coverage_20/centered_abs_mean": 0.13409923911094665, "signal/frontier_coverage_20/group_bin_occupancy": 0.837109375, "signal/frontier_coverage_20/group_std_mean": 0.17386512756347655, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025803008582443, "signal/frontier_coverage_20/weight": 0.019240697100758552, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025803008582443, "signal/frontier_coverage_25/centered_abs_mean": 0.07745042741298676, "signal/frontier_coverage_25/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_25/group_std_mean": 0.10080204159021378, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015221974346786738, "signal/frontier_coverage_25/weight": 0.01965273208916187, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015221974346786738, "signal/frontier_coverage_5/centered_abs_mean": 0.17897019982337953, "signal/frontier_coverage_5/group_bin_occupancy": 0.85, "signal/frontier_coverage_5/group_std_mean": 0.2304094761610031, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003422551741823554, "signal/frontier_coverage_5/weight": 0.01912260763347149, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003422551741823554, "signal/frontier_ece_reward/centered_abs_mean": 0.007032676786184311, "signal/frontier_ece_reward/group_bin_occupancy": 0.715625, "signal/frontier_ece_reward/group_std_mean": 0.00889311209321022, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010449515888467432, "signal/frontier_ece_reward/weight": 0.14858417212963104, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010449515888467432, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07925714552402496, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.56171875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.09801054894924163, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01367063745856285, "signal/frontier_entropy_batch_reward/weight": 0.172488734126091, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01367063745856285, "step": 225 }, { "adaptive_ema/accuracy_reward": 0.48034190133964305, "adaptive_ema/brier_reward": 0.7311690908832558, "adaptive_ema/format_reward": 0.9622152208058333, "adaptive_ema/frontier_aurc_reward": 0.03366201988413342, "adaptive_ema/frontier_coverage_1": 0.13972719591673705, "adaptive_ema/frontier_coverage_10": 0.13972719591673705, "adaptive_ema/frontier_coverage_15": 0.13972719591673705, "adaptive_ema/frontier_coverage_20": 0.13416882024286453, "adaptive_ema/frontier_coverage_25": 0.11526060077836695, "adaptive_ema/frontier_coverage_5": 0.13972719591673705, "adaptive_ema/frontier_ece_reward": 0.04333311199995612, "adaptive_ema/frontier_entropy_batch_reward": -0.10496182388959377, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.04186927527189255, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.02155204601585865, "adaptive_weight/frontier_coverage_1": 0.01918649598956108, "adaptive_weight/frontier_coverage_10": 0.01918649598956108, "adaptive_weight/frontier_coverage_15": 0.01918649598956108, "adaptive_weight/frontier_coverage_20": 0.019310463592410086, "adaptive_weight/frontier_coverage_25": 0.019732169806957245, "adaptive_weight/frontier_coverage_5": 0.01918649598956108, "adaptive_weight/frontier_ece_reward": 0.14899688959121704, "adaptive_weight/frontier_entropy_batch_reward": 0.17209317088127135, "calibration/aurc": 0.2774290384797605, "calibration/batch_distribution_entropy": 0.975751417418882, "calibration/batch_entropy_100bins": 0.7119109716006123, "calibration/batch_entropy_10bins": 0.975751417418882, "calibration/batch_entropy_50bins": 0.8102837692413288, "calibration/batch_uniqueness": 0.898162841796875, "calibration/buffer_distribution_entropy": 0.985121896835086, "calibration/buffer_entropy_100bins": 0.722076707653373, "calibration/buffer_entropy_10bins": 0.985121896835086, "calibration/buffer_entropy_50bins": 0.820123186724713, "calibration/confidence_entropy": 0.48087078575614656, "calibration/coverage@0%": 0.00390625, "calibration/coverage@1%": 0.00390625, "calibration/coverage@10%": 0.048828125, "calibration/coverage@15%": 0.210546875, "calibration/coverage@20%": 0.35078125, "calibration/coverage@25%": 0.488671875, "calibration/coverage@30%": 0.5984375, "calibration/coverage@5%": 0.00390625, "calibration/ece": 0.14718359374999998, "calibration/mean_confidence": 0.5199648437500001, "calibration/prompt_uniqueness": 0.725732421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 692.8, "completions/max_terminated_length": 487.6, "completions/mean_length": 221.00791015625, "completions/mean_terminated_length": 220.75122985839843, "completions/min_length": 101.4, "completions/min_terminated_length": 101.4, "epoch": 0.736, "grad_norm": 0.002505439566448331, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 783715303.0, "reward": 0.8198559522628784, "reward_std": 0.07011436522006989, "rewards/accuracy_reward": 0.56396484375, "rewards/brier_reward": 0.7870771884918213, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002653352613560855, "rewards/frontier_coverage_1": 0.09510880410671234, "rewards/frontier_coverage_10": 0.09510880410671234, "rewards/frontier_coverage_15": 0.09510880410671234, "rewards/frontier_coverage_20": 0.08357458561658859, "rewards/frontier_coverage_25": 0.05747109428048134, "rewards/frontier_coverage_5": 0.09510880410671234, "rewards/frontier_ece_reward": 0.004429191770032048, "rewards/frontier_entropy_batch_reward": -0.033060839772224425, "signal/accuracy_reward/centered_abs_mean": 0.092315673828125, "signal/accuracy_reward/group_bin_occupancy": 0.171484375, "signal/accuracy_reward/group_std_mean": 0.1249350905418396, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0461578369140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0461578369140625, "signal/advantage_abs_mean": 0.05301322415471077, "signal/advantage_pre_scale_abs_mean": 0.05301322415471077, "signal/advantage_pre_scale_std": 0.09370943009853364, "signal/advantage_std": 0.09370943009853364, "signal/brier_reward/centered_abs_mean": 0.13454246371984482, "signal/brier_reward/group_bin_occupancy": 0.8171875, "signal/brier_reward/group_std_mean": 0.17263633012771606, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0056332145817577835, "signal/brier_reward/weight": 0.04186927527189255, "signal/brier_reward/weighted_centered_abs_mean": 0.0056332145817577835, "signal/format_reward/centered_abs_mean": 0.0003662109375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.000768545875325799, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00018310546875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025338149163872, "signal/frontier_aurc_reward/group_bin_occupancy": 0.671484375, "signal/frontier_aurc_reward/group_std_mean": 0.004274234082549811, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4615944827673955e-05, "signal/frontier_aurc_reward/weight": 0.02155204601585865, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4615944827673955e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1820545345544815, "signal/frontier_coverage_1/group_bin_occupancy": 0.837109375, "signal/frontier_coverage_1/group_std_mean": 0.23294652104377747, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034930593799799682, "signal/frontier_coverage_1/weight": 0.01918649598956108, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034930593799799682, "signal/frontier_coverage_10/centered_abs_mean": 0.1820545345544815, "signal/frontier_coverage_10/group_bin_occupancy": 0.837109375, "signal/frontier_coverage_10/group_std_mean": 0.23294652104377747, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034930593799799682, "signal/frontier_coverage_10/weight": 0.01918649598956108, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034930593799799682, "signal/frontier_coverage_15/centered_abs_mean": 0.1820545345544815, "signal/frontier_coverage_15/group_bin_occupancy": 0.837109375, "signal/frontier_coverage_15/group_std_mean": 0.23294652104377747, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034930593799799682, "signal/frontier_coverage_15/weight": 0.01918649598956108, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034930593799799682, "signal/frontier_coverage_20/centered_abs_mean": 0.13491614758968354, "signal/frontier_coverage_20/group_bin_occupancy": 0.825390625, "signal/frontier_coverage_20/group_std_mean": 0.17339130043983458, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026054022833704948, "signal/frontier_coverage_20/weight": 0.019310463592410086, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026054022833704948, "signal/frontier_coverage_25/centered_abs_mean": 0.07765587270259858, "signal/frontier_coverage_25/group_bin_occupancy": 0.8625, "signal/frontier_coverage_25/group_std_mean": 0.1002039521932602, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015323557192459703, "signal/frontier_coverage_25/weight": 0.019732169806957245, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015323557192459703, "signal/frontier_coverage_5/centered_abs_mean": 0.1820545345544815, "signal/frontier_coverage_5/group_bin_occupancy": 0.837109375, "signal/frontier_coverage_5/group_std_mean": 0.23294652104377747, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034930593799799682, "signal/frontier_coverage_5/weight": 0.01918649598956108, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034930593799799682, "signal/frontier_ece_reward/centered_abs_mean": 0.006834933627396822, "signal/frontier_ece_reward/group_bin_occupancy": 0.71484375, "signal/frontier_ece_reward/group_std_mean": 0.00876548495143652, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010184106649830937, "signal/frontier_ece_reward/weight": 0.14899688959121704, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010184106649830937, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06465991437435151, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.524609375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.08424876034259796, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011128241941332818, "signal/frontier_entropy_batch_reward/weight": 0.17209317088127135, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011128241941332818, "step": 230 }, { "adaptive_ema/accuracy_reward": 0.4846319086158449, "adaptive_ema/brier_reward": 0.7338147443526692, "adaptive_ema/format_reward": 0.9640594318178, "adaptive_ema/frontier_aurc_reward": 0.031872542136557096, "adaptive_ema/frontier_coverage_1": 0.1375201093728598, "adaptive_ema/frontier_coverage_10": 0.1375201093728598, "adaptive_ema/frontier_coverage_15": 0.1375201093728598, "adaptive_ema/frontier_coverage_20": 0.13135532068478054, "adaptive_ema/frontier_coverage_25": 0.11224977167595156, "adaptive_ema/frontier_coverage_5": 0.1375201093728598, "adaptive_ema/frontier_ece_reward": 0.0414305952665723, "adaptive_ema/frontier_entropy_batch_reward": -0.10191647726630064, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.041475728154182434, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.02160160094499588, "adaptive_weight/frontier_coverage_1": 0.01924430951476097, "adaptive_weight/frontier_coverage_10": 0.01924430951476097, "adaptive_weight/frontier_coverage_15": 0.01924430951476097, "adaptive_weight/frontier_coverage_20": 0.019381864368915556, "adaptive_weight/frontier_coverage_25": 0.01980816349387169, "adaptive_weight/frontier_coverage_5": 0.01924430951476097, "adaptive_weight/frontier_ece_reward": 0.14935987889766694, "adaptive_weight/frontier_entropy_batch_reward": 0.17169552445411682, "calibration/aurc": 0.2963728152958468, "calibration/batch_distribution_entropy": 0.9633717819728546, "calibration/batch_entropy_100bins": 0.6890503821265225, "calibration/batch_entropy_10bins": 0.9633717819728546, "calibration/batch_entropy_50bins": 0.7882439531361687, "calibration/batch_uniqueness": 0.8822235107421875, "calibration/buffer_distribution_entropy": 0.9859966226633015, "calibration/buffer_entropy_100bins": 0.7234582418960763, "calibration/buffer_entropy_10bins": 0.9859966226633015, "calibration/buffer_entropy_50bins": 0.821087380334359, "calibration/confidence_entropy": 0.4473816058373828, "calibration/coverage@0%": 0.011328125, "calibration/coverage@1%": 0.011328125, "calibration/coverage@10%": 0.076171875, "calibration/coverage@15%": 0.18984375, "calibration/coverage@20%": 0.2984375, "calibration/coverage@25%": 0.4109375, "calibration/coverage@30%": 0.56875, "calibration/coverage@5%": 0.011328125, "calibration/ece": 0.12555078125000002, "calibration/mean_confidence": 0.45226953124999997, "calibration/prompt_uniqueness": 0.702001953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 449.6, "completions/max_terminated_length": 449.6, "completions/mean_length": 215.694140625, "completions/mean_terminated_length": 215.694140625, "completions/min_length": 102.4, "completions/min_terminated_length": 102.4, "epoch": 0.752, "grad_norm": 0.0009011356742121279, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 801151211.0, "reward": 0.8150582075119018, "reward_std": 0.0696170151233673, "rewards/accuracy_reward": 0.55830078125, "rewards/brier_reward": 0.791687285900116, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.003065359219908714, "rewards/frontier_coverage_1": 0.1088890254497528, "rewards/frontier_coverage_10": 0.1088890254497528, "rewards/frontier_coverage_15": 0.1088890254497528, "rewards/frontier_coverage_20": 0.08239309936761856, "rewards/frontier_coverage_25": 0.05571244210004807, "rewards/frontier_coverage_5": 0.1088890254497528, "rewards/frontier_ece_reward": 0.004906528582796455, "rewards/frontier_entropy_batch_reward": -0.05090191811323166, "signal/accuracy_reward/centered_abs_mean": 0.088250732421875, "signal/accuracy_reward/group_bin_occupancy": 0.16796875, "signal/accuracy_reward/group_std_mean": 0.11853147447109222, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0441253662109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0441253662109375, "signal/advantage_abs_mean": 0.05262523740530014, "signal/advantage_pre_scale_abs_mean": 0.05262523740530014, "signal/advantage_pre_scale_std": 0.09439008533954621, "signal/advantage_std": 0.09439008533954621, "signal/brier_reward/centered_abs_mean": 0.12752344012260436, "signal/brier_reward/group_bin_occupancy": 0.80859375, "signal/brier_reward/group_std_mean": 0.16469366252422332, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005290476325899363, "signal/brier_reward/weight": 0.041475728154182434, "signal/brier_reward/weighted_centered_abs_mean": 0.005290476325899363, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002861019968986511, "signal/frontier_aurc_reward/group_bin_occupancy": 0.66484375, "signal/frontier_aurc_reward/group_std_mean": 0.00461784191429615, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.180902564665303e-05, "signal/frontier_aurc_reward/weight": 0.02160160094499588, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.180902564665303e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1756511449813843, "signal/frontier_coverage_1/group_bin_occupancy": 0.8453125, "signal/frontier_coverage_1/group_std_mean": 0.2248201698064804, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033802258782088756, "signal/frontier_coverage_1/weight": 0.01924430951476097, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033802258782088756, "signal/frontier_coverage_10/centered_abs_mean": 0.1756511449813843, "signal/frontier_coverage_10/group_bin_occupancy": 0.8453125, "signal/frontier_coverage_10/group_std_mean": 0.2248201698064804, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033802258782088756, "signal/frontier_coverage_10/weight": 0.01924430951476097, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033802258782088756, "signal/frontier_coverage_15/centered_abs_mean": 0.1756511449813843, "signal/frontier_coverage_15/group_bin_occupancy": 0.8453125, "signal/frontier_coverage_15/group_std_mean": 0.2248201698064804, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033802258782088756, "signal/frontier_coverage_15/weight": 0.01924430951476097, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033802258782088756, "signal/frontier_coverage_20/centered_abs_mean": 0.12954683005809783, "signal/frontier_coverage_20/group_bin_occupancy": 0.8234375, "signal/frontier_coverage_20/group_std_mean": 0.16692675650119781, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002510684449225664, "signal/frontier_coverage_20/weight": 0.019381864368915556, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002510684449225664, "signal/frontier_coverage_25/centered_abs_mean": 0.07408646047115326, "signal/frontier_coverage_25/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_25/group_std_mean": 0.09557124227285385, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014674163889139892, "signal/frontier_coverage_25/weight": 0.01980816349387169, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014674163889139892, "signal/frontier_coverage_5/centered_abs_mean": 0.1756511449813843, "signal/frontier_coverage_5/group_bin_occupancy": 0.8453125, "signal/frontier_coverage_5/group_std_mean": 0.2248201698064804, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033802258782088756, "signal/frontier_coverage_5/weight": 0.01924430951476097, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033802258782088756, "signal/frontier_ece_reward/centered_abs_mean": 0.006796565931290388, "signal/frontier_ece_reward/group_bin_occupancy": 0.709765625, "signal/frontier_ece_reward/group_std_mean": 0.00861854236572981, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010151177062653004, "signal/frontier_ece_reward/weight": 0.14935987889766694, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010151177062653004, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08200211226940154, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.54140625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.10221528112888337, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01407961007207632, "signal/frontier_entropy_batch_reward/weight": 0.17169552445411682, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01407961007207632, "step": 235 }, { "adaptive_ema/accuracy_reward": 0.4862951098712302, "adaptive_ema/brier_reward": 0.7370724491017904, "adaptive_ema/format_reward": 0.9658179767826939, "adaptive_ema/frontier_aurc_reward": 0.030162166301981302, "adaptive_ema/frontier_coverage_1": 0.1377375083668309, "adaptive_ema/frontier_coverage_10": 0.1377375083668309, "adaptive_ema/frontier_coverage_15": 0.1377375083668309, "adaptive_ema/frontier_coverage_20": 0.13041548891361807, "adaptive_ema/frontier_coverage_25": 0.11005992251250514, "adaptive_ema/frontier_coverage_5": 0.1377375083668309, "adaptive_ema/frontier_ece_reward": 0.03967249820244284, "adaptive_ema/frontier_entropy_batch_reward": -0.10005694398465499, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.0410037562251091, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.02165858820080757, "adaptive_weight/frontier_coverage_1": 0.019256194308400153, "adaptive_weight/frontier_coverage_10": 0.019256194308400153, "adaptive_weight/frontier_coverage_15": 0.019256194308400153, "adaptive_weight/frontier_coverage_20": 0.0194197129458189, "adaptive_weight/frontier_coverage_25": 0.019874298945069313, "adaptive_weight/frontier_coverage_5": 0.019256194308400153, "adaptive_weight/frontier_ece_reward": 0.14976398646831512, "adaptive_weight/frontier_entropy_batch_reward": 0.17155487537384034, "calibration/aurc": 0.2890387688744652, "calibration/batch_distribution_entropy": 0.9747367306286249, "calibration/batch_entropy_100bins": 0.7005247252075868, "calibration/batch_entropy_10bins": 0.9747367306286249, "calibration/batch_entropy_50bins": 0.8037471501457052, "calibration/batch_uniqueness": 0.896148681640625, "calibration/buffer_distribution_entropy": 0.9864059528029558, "calibration/buffer_entropy_100bins": 0.723835422585013, "calibration/buffer_entropy_10bins": 0.9864059528029558, "calibration/buffer_entropy_50bins": 0.8212176638152784, "calibration/confidence_entropy": 0.4836140466275616, "calibration/coverage@0%": 0.020703125, "calibration/coverage@1%": 0.020703125, "calibration/coverage@10%": 0.23359375, "calibration/coverage@15%": 0.274609375, "calibration/coverage@20%": 0.38359375, "calibration/coverage@25%": 0.43828125, "calibration/coverage@30%": 0.51171875, "calibration/coverage@5%": 0.152734375, "calibration/ece": 0.16974999999999998, "calibration/mean_confidence": 0.45348437500000005, "calibration/prompt_uniqueness": 0.75390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 472.0, "completions/max_terminated_length": 472.0, "completions/mean_length": 220.0798828125, "completions/mean_terminated_length": 220.0798828125, "completions/min_length": 95.4, "completions/min_terminated_length": 95.4, "epoch": 0.768, "grad_norm": 0.0007895245798863471, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 818337533.0, "reward": 0.7931086540222168, "reward_std": 0.07328220456838608, "rewards/accuracy_reward": 0.51162109375, "rewards/brier_reward": 0.8012859940528869, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002820562291890383, "rewards/frontier_coverage_1": 0.1454554319381714, "rewards/frontier_coverage_10": 0.1454554319381714, "rewards/frontier_coverage_15": 0.1454554319381714, "rewards/frontier_coverage_20": 0.11572380065917968, "rewards/frontier_coverage_25": 0.0690992683172226, "rewards/frontier_coverage_5": 0.1454554319381714, "rewards/frontier_ece_reward": 0.00546963894739747, "rewards/frontier_entropy_batch_reward": -0.06505972109735011, "signal/accuracy_reward/centered_abs_mean": 0.091497802734375, "signal/accuracy_reward/group_bin_occupancy": 0.16953125, "signal/accuracy_reward/group_std_mean": 0.12290232628583908, "signal/accuracy_reward/group_zero_std_frac": 0.64375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0457489013671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0457489013671875, "signal/advantage_abs_mean": 0.0559655487537384, "signal/advantage_pre_scale_abs_mean": 0.0559655487537384, "signal/advantage_pre_scale_std": 0.09721155613660812, "signal/advantage_std": 0.09721155613660812, "signal/brier_reward/centered_abs_mean": 0.1295279458165169, "signal/brier_reward/group_bin_occupancy": 0.809765625, "signal/brier_reward/group_std_mean": 0.1659324049949646, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005311077646911144, "signal/brier_reward/weight": 0.0410037562251091, "signal/brier_reward/weighted_centered_abs_mean": 0.005311077646911144, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026746280957013367, "signal/frontier_aurc_reward/group_bin_occupancy": 0.673828125, "signal/frontier_aurc_reward/group_std_mean": 0.004395715426653624, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.7932342315325513e-05, "signal/frontier_aurc_reward/weight": 0.02165858820080757, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.7932342315325513e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18093566000461578, "signal/frontier_coverage_1/group_bin_occupancy": 0.85234375, "signal/frontier_coverage_1/group_std_mean": 0.23029330968856812, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034840914886444807, "signal/frontier_coverage_1/weight": 0.019256194308400153, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034840914886444807, "signal/frontier_coverage_10/centered_abs_mean": 0.18093566000461578, "signal/frontier_coverage_10/group_bin_occupancy": 0.85234375, "signal/frontier_coverage_10/group_std_mean": 0.23029330968856812, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034840914886444807, "signal/frontier_coverage_10/weight": 0.019256194308400153, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034840914886444807, "signal/frontier_coverage_15/centered_abs_mean": 0.18093566000461578, "signal/frontier_coverage_15/group_bin_occupancy": 0.85234375, "signal/frontier_coverage_15/group_std_mean": 0.23029330968856812, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034840914886444807, "signal/frontier_coverage_15/weight": 0.019256194308400153, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034840914886444807, "signal/frontier_coverage_20/centered_abs_mean": 0.13377536982297897, "signal/frontier_coverage_20/group_bin_occupancy": 0.844140625, "signal/frontier_coverage_20/group_std_mean": 0.17080979347229003, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025977870915085076, "signal/frontier_coverage_20/weight": 0.0194197129458189, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025977870915085076, "signal/frontier_coverage_25/centered_abs_mean": 0.07524611651897431, "signal/frontier_coverage_25/group_bin_occupancy": 0.878515625, "signal/frontier_coverage_25/group_std_mean": 0.09601740390062333, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014954280573874711, "signal/frontier_coverage_25/weight": 0.019874298945069313, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014954280573874711, "signal/frontier_coverage_5/centered_abs_mean": 0.18093566000461578, "signal/frontier_coverage_5/group_bin_occupancy": 0.85234375, "signal/frontier_coverage_5/group_std_mean": 0.23029330968856812, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034840914886444807, "signal/frontier_coverage_5/weight": 0.019256194308400153, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034840914886444807, "signal/frontier_ece_reward/centered_abs_mean": 0.00658888490870595, "signal/frontier_ece_reward/group_bin_occupancy": 0.721875, "signal/frontier_ece_reward/group_std_mean": 0.008340070582926273, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000986759876832366, "signal/frontier_ece_reward/weight": 0.14976398646831512, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000986759876832366, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09385951161384583, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.545703125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.11752689033746719, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01610282640904188, "signal/frontier_entropy_batch_reward/weight": 0.17155487537384034, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01610282640904188, "step": 240 }, { "adaptive_ema/accuracy_reward": 0.48961167774338754, "adaptive_ema/brier_reward": 0.7395506917762349, "adaptive_ema/format_reward": 0.967481739658895, "adaptive_ema/frontier_aurc_reward": 0.02854741451555499, "adaptive_ema/frontier_coverage_1": 0.1359473757242809, "adaptive_ema/frontier_coverage_10": 0.1359473757242809, "adaptive_ema/frontier_coverage_15": 0.1359473757242809, "adaptive_ema/frontier_coverage_20": 0.1279098750512469, "adaptive_ema/frontier_coverage_25": 0.10728999219979576, "adaptive_ema/frontier_coverage_5": 0.1359473757242809, "adaptive_ema/frontier_ece_reward": 0.037937546839066597, "adaptive_ema/frontier_entropy_batch_reward": -0.09750663975242849, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.040633540600538254, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.021703314781188966, "adaptive_weight/frontier_coverage_1": 0.019303881376981736, "adaptive_weight/frontier_coverage_10": 0.019303881376981736, "adaptive_weight/frontier_coverage_15": 0.019303881376981736, "adaptive_weight/frontier_coverage_20": 0.019483447819948197, "adaptive_weight/frontier_coverage_25": 0.019944119080901147, "adaptive_weight/frontier_coverage_5": 0.019303881376981736, "adaptive_weight/frontier_ece_reward": 0.15009447634220124, "adaptive_weight/frontier_entropy_batch_reward": 0.17122556865215302, "calibration/aurc": 0.33736045830547284, "calibration/batch_distribution_entropy": 0.9776407299368929, "calibration/batch_entropy_100bins": 0.7138942156860587, "calibration/batch_entropy_10bins": 0.9776407299368929, "calibration/batch_entropy_50bins": 0.8104821318344806, "calibration/batch_uniqueness": 0.895302625944048, "calibration/buffer_distribution_entropy": 0.9869873126911267, "calibration/buffer_entropy_100bins": 0.7250785459370659, "calibration/buffer_entropy_10bins": 0.9869873126911267, "calibration/buffer_entropy_50bins": 0.8220233233940262, "calibration/confidence_entropy": 0.4486780646244971, "calibration/coverage@0%": 0.02813340875733855, "calibration/coverage@1%": 0.02813340875733855, "calibration/coverage@10%": 0.14844590875733857, "calibration/coverage@15%": 0.23293939579256362, "calibration/coverage@20%": 0.2794749877690802, "calibration/coverage@25%": 0.3134937622309198, "calibration/coverage@30%": 0.43190970523483363, "calibration/coverage@5%": 0.12110215875733857, "calibration/ece": 0.16516171110567518, "calibration/mean_confidence": 0.48722826259784735, "calibration/prompt_uniqueness": 0.7043301553557493, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1123.0, "completions/max_terminated_length": 609.0, "completions/mean_length": 223.16259765625, "completions/mean_terminated_length": 222.7779998779297, "completions/min_length": 105.4, "completions/min_terminated_length": 105.4, "epoch": 0.784, "grad_norm": 0.0007308553322218359, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 835797086.0, "reward": 0.8105384111404419, "reward_std": 0.07616954743862152, "rewards/accuracy_reward": 0.55625, "rewards/brier_reward": 0.7731986522674561, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.00297453529201448, "rewards/frontier_coverage_1": 0.08737537786364555, "rewards/frontier_coverage_10": 0.08737537786364555, "rewards/frontier_coverage_15": 0.08737537786364555, "rewards/frontier_coverage_20": 0.06674087345600128, "rewards/frontier_coverage_25": 0.04688483104109764, "rewards/frontier_coverage_5": 0.08737537786364555, "rewards/frontier_ece_reward": 0.003793169092386961, "rewards/frontier_entropy_batch_reward": -0.04895992577075958, "signal/accuracy_reward/centered_abs_mean": 0.1007080078125, "signal/accuracy_reward/group_bin_occupancy": 0.175, "signal/accuracy_reward/group_std_mean": 0.13696602880954742, "signal/accuracy_reward/group_zero_std_frac": 0.6, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05035400390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05035400390625, "signal/advantage_abs_mean": 0.057179969549179074, "signal/advantage_pre_scale_abs_mean": 0.057179969549179074, "signal/advantage_pre_scale_std": 0.09933086186647415, "signal/advantage_std": 0.09933086186647415, "signal/brier_reward/centered_abs_mean": 0.1394079804420471, "signal/brier_reward/group_bin_occupancy": 0.819921875, "signal/brier_reward/group_std_mean": 0.17940678894519807, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005664252396672964, "signal/brier_reward/weight": 0.040633540600538254, "signal/brier_reward/weighted_centered_abs_mean": 0.005664252396672964, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028895474504679443, "signal/frontier_aurc_reward/group_bin_occupancy": 0.673828125, "signal/frontier_aurc_reward/group_std_mean": 0.004819400142878294, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.271691308938898e-05, "signal/frontier_aurc_reward/weight": 0.021703314781188966, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.271691308938898e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18579940497875214, "signal/frontier_coverage_1/group_bin_occupancy": 0.84453125, "signal/frontier_coverage_1/group_std_mean": 0.24057165384292603, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035869122948497534, "signal/frontier_coverage_1/weight": 0.019303881376981736, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035869122948497534, "signal/frontier_coverage_10/centered_abs_mean": 0.18579940497875214, "signal/frontier_coverage_10/group_bin_occupancy": 0.84453125, "signal/frontier_coverage_10/group_std_mean": 0.24057165384292603, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035869122948497534, "signal/frontier_coverage_10/weight": 0.019303881376981736, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035869122948497534, "signal/frontier_coverage_15/centered_abs_mean": 0.18579940497875214, "signal/frontier_coverage_15/group_bin_occupancy": 0.84453125, "signal/frontier_coverage_15/group_std_mean": 0.24057165384292603, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035869122948497534, "signal/frontier_coverage_15/weight": 0.019303881376981736, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035869122948497534, "signal/frontier_coverage_20/centered_abs_mean": 0.12399939149618149, "signal/frontier_coverage_20/group_bin_occupancy": 0.836328125, "signal/frontier_coverage_20/group_std_mean": 0.16162908375263213, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002416027104482055, "signal/frontier_coverage_20/weight": 0.019483447819948197, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002416027104482055, "signal/frontier_coverage_25/centered_abs_mean": 0.07161483764648438, "signal/frontier_coverage_25/group_bin_occupancy": 0.884765625, "signal/frontier_coverage_25/group_std_mean": 0.09258138835430145, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001428340864367783, "signal/frontier_coverage_25/weight": 0.019944119080901147, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001428340864367783, "signal/frontier_coverage_5/centered_abs_mean": 0.18579940497875214, "signal/frontier_coverage_5/group_bin_occupancy": 0.84453125, "signal/frontier_coverage_5/group_std_mean": 0.24057165384292603, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035869122948497534, "signal/frontier_coverage_5/weight": 0.019303881376981736, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035869122948497534, "signal/frontier_ece_reward/centered_abs_mean": 0.006644812785089016, "signal/frontier_ece_reward/group_bin_occupancy": 0.7359375, "signal/frontier_ece_reward/group_std_mean": 0.00850515179336071, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009973590495064855, "signal/frontier_ece_reward/weight": 0.15009447634220124, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009973590495064855, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07034566476941109, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.541796875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.08720882162451744, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012043842859566212, "signal/frontier_entropy_batch_reward/weight": 0.17122556865215302, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.012043842859566212, "step": 245 }, { "adaptive_ema/accuracy_reward": 0.49278542702458816, "adaptive_ema/brier_reward": 0.7413502303549515, "adaptive_ema/format_reward": 0.9690697337655709, "adaptive_ema/frontier_aurc_reward": 0.02698703182868343, "adaptive_ema/frontier_coverage_1": 0.13391419914220162, "adaptive_ema/frontier_coverage_10": 0.13391419914220162, "adaptive_ema/frontier_coverage_15": 0.1339120971969206, "adaptive_ema/frontier_coverage_20": 0.1251032662017515, "adaptive_ema/frontier_coverage_25": 0.10439676368024298, "adaptive_ema/frontier_coverage_5": 0.13391419914220162, "adaptive_ema/frontier_ece_reward": 0.0362690753632365, "adaptive_ema/frontier_entropy_batch_reward": -0.09606711071792928, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.040344792604446414, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.021733880043029785, "adaptive_weight/frontier_coverage_1": 0.01934548281133175, "adaptive_weight/frontier_coverage_10": 0.01934548281133175, "adaptive_weight/frontier_coverage_15": 0.019345529749989508, "adaptive_weight/frontier_coverage_20": 0.019542290642857552, "adaptive_weight/frontier_coverage_25": 0.02000480554997921, "adaptive_weight/frontier_coverage_5": 0.01934548281133175, "adaptive_weight/frontier_ece_reward": 0.1503250777721405, "adaptive_weight/frontier_entropy_batch_reward": 0.1709671676158905, "calibration/aurc": 0.23386092363351443, "calibration/batch_distribution_entropy": 0.9667728581976904, "calibration/batch_entropy_100bins": 0.7156706087414253, "calibration/batch_entropy_10bins": 0.9667728581976904, "calibration/batch_entropy_50bins": 0.8090857984717517, "calibration/batch_uniqueness": 0.8946533203125, "calibration/buffer_distribution_entropy": 0.9875890271396454, "calibration/buffer_entropy_100bins": 0.7263351886603608, "calibration/buffer_entropy_10bins": 0.9875890271396454, "calibration/buffer_entropy_50bins": 0.8227308342238109, "calibration/confidence_entropy": 0.45589743198968674, "calibration/coverage@0%": 0.041796875, "calibration/coverage@1%": 0.041796875, "calibration/coverage@10%": 0.110546875, "calibration/coverage@15%": 0.399609375, "calibration/coverage@20%": 0.521484375, "calibration/coverage@25%": 0.60390625, "calibration/coverage@30%": 0.683984375, "calibration/coverage@5%": 0.041796875, "calibration/ece": 0.1378772265625, "calibration/mean_confidence": 0.46403683593749995, "calibration/prompt_uniqueness": 0.7259765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 498.6, "completions/max_terminated_length": 498.6, "completions/mean_length": 226.61142578125, "completions/mean_terminated_length": 226.61142578125, "completions/min_length": 109.4, "completions/min_terminated_length": 109.4, "epoch": 0.8, "grad_norm": 0.001016717404127121, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 853128147.0, "reward": 0.8214071750640869, "reward_std": 0.07228949069976806, "rewards/accuracy_reward": 0.58154296875, "rewards/brier_reward": 0.7869839787483215, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0029516459442675115, "rewards/frontier_coverage_1": 0.08761567920446396, "rewards/frontier_coverage_10": 0.08761567920446396, "rewards/frontier_coverage_15": 0.08751005381345749, "rewards/frontier_coverage_20": 0.06634584963321685, "rewards/frontier_coverage_25": 0.048831145837903024, "rewards/frontier_coverage_5": 0.08761567920446396, "rewards/frontier_ece_reward": 0.00398175586014986, "rewards/frontier_entropy_batch_reward": -0.062575813382864, "signal/accuracy_reward/centered_abs_mean": 0.096124267578125, "signal/accuracy_reward/group_bin_occupancy": 0.17109375, "signal/accuracy_reward/group_std_mean": 0.12790462523698806, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0480621337890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0480621337890625, "signal/advantage_abs_mean": 0.05569566786289215, "signal/advantage_pre_scale_abs_mean": 0.05569566786289215, "signal/advantage_pre_scale_std": 0.0975670725107193, "signal/advantage_std": 0.0975670725107193, "signal/brier_reward/centered_abs_mean": 0.12941146790981292, "signal/brier_reward/group_bin_occupancy": 0.819921875, "signal/brier_reward/group_std_mean": 0.16520380973815918, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005221651773899794, "signal/brier_reward/weight": 0.040344792604446414, "signal/brier_reward/weighted_centered_abs_mean": 0.005221651773899794, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027469987981021404, "signal/frontier_aurc_reward/group_bin_occupancy": 0.68671875, "signal/frontier_aurc_reward/group_std_mean": 0.004314049286767841, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.96989098994527e-05, "signal/frontier_aurc_reward/weight": 0.021733880043029785, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.96989098994527e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.178289532661438, "signal/frontier_coverage_1/group_bin_occupancy": 0.844140625, "signal/frontier_coverage_1/group_std_mean": 0.22912515997886657, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034491011407226323, "signal/frontier_coverage_1/weight": 0.01934548281133175, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034491011407226323, "signal/frontier_coverage_10/centered_abs_mean": 0.178289532661438, "signal/frontier_coverage_10/group_bin_occupancy": 0.844140625, "signal/frontier_coverage_10/group_std_mean": 0.22912515997886657, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034491011407226323, "signal/frontier_coverage_10/weight": 0.01934548281133175, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034491011407226323, "signal/frontier_coverage_15/centered_abs_mean": 0.1779107928276062, "signal/frontier_coverage_15/group_bin_occupancy": 0.84296875, "signal/frontier_coverage_15/group_std_mean": 0.22861688137054442, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034417792223393915, "signal/frontier_coverage_15/weight": 0.019345529749989508, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034417792223393915, "signal/frontier_coverage_20/centered_abs_mean": 0.11839482039213181, "signal/frontier_coverage_20/group_bin_occupancy": 0.8390625, "signal/frontier_coverage_20/group_std_mean": 0.15338994562625885, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002313756477087736, "signal/frontier_coverage_20/weight": 0.019542290642857552, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002313756477087736, "signal/frontier_coverage_25/centered_abs_mean": 0.0669538915157318, "signal/frontier_coverage_25/group_bin_occupancy": 0.891015625, "signal/frontier_coverage_25/group_std_mean": 0.08613462299108506, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013393915724009275, "signal/frontier_coverage_25/weight": 0.02000480554997921, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013393915724009275, "signal/frontier_coverage_5/centered_abs_mean": 0.178289532661438, "signal/frontier_coverage_5/group_bin_occupancy": 0.844140625, "signal/frontier_coverage_5/group_std_mean": 0.22912515997886657, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034491011407226323, "signal/frontier_coverage_5/weight": 0.01934548281133175, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034491011407226323, "signal/frontier_ece_reward/centered_abs_mean": 0.0066254565492272375, "signal/frontier_ece_reward/group_bin_occupancy": 0.70234375, "signal/frontier_ece_reward/group_std_mean": 0.008377740532159806, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009959676070138812, "signal/frontier_ece_reward/weight": 0.1503250777721405, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009959676070138812, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08768114149570465, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.535546875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.10930583029985427, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014992043934762478, "signal/frontier_entropy_batch_reward/weight": 0.1709671676158905, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014992043934762478, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.4982370503843496, "eval_calibration/batch_distribution_entropy": 0.9075570584484239, "eval_calibration/batch_entropy_100bins": 0.5721649164690725, "eval_calibration/batch_entropy_10bins": 0.9075570584484239, "eval_calibration/batch_entropy_50bins": 0.6600130307272073, "eval_calibration/batch_uniqueness": 0.8154296875, "eval_calibration/buffer_distribution_entropy": 0.9877852855039699, "eval_calibration/buffer_entropy_100bins": 0.727217222230328, "eval_calibration/buffer_entropy_10bins": 0.9877852855039699, "eval_calibration/buffer_entropy_50bins": 0.8232864763440138, "eval_calibration/confidence_entropy": 0.4677585964356593, "eval_calibration/coverage@0%": 0.03125, "eval_calibration/coverage@1%": 0.03125, "eval_calibration/coverage@10%": 0.03125, "eval_calibration/coverage@15%": 0.1328125, "eval_calibration/coverage@20%": 0.1328125, "eval_calibration/coverage@25%": 0.140625, "eval_calibration/coverage@30%": 0.203125, "eval_calibration/coverage@5%": 0.03125, "eval_calibration/ece": 0.169453125, "eval_calibration/mean_confidence": 0.41960937500000006, "eval_calibration/prompt_uniqueness": 0.8154296875, "eval_completions/clipped_ratio": 0.001953125, "eval_completions/max_length": 683.25, "eval_completions/max_terminated_length": 399.25, "eval_completions/mean_length": 233.7618522644043, "eval_completions/mean_terminated_length": 231.1970443725586, "eval_completions/min_length": 132.0, "eval_completions/min_terminated_length": 132.0, "eval_loss": 0.0, "eval_num_tokens": 853128147.0, "eval_reward": 0.7381985783576965, "eval_reward_std": 0.2331293225288391, "eval_rewards/accuracy_reward": 0.443359375, "eval_rewards/brier_reward": 0.792000949382782, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_aurc_reward": -0.0030543976463377476, "eval_rewards/frontier_coverage_1": 0.18230951577425003, "eval_rewards/frontier_coverage_10": 0.18230951577425003, "eval_rewards/frontier_coverage_15": 0.18006108701229095, "eval_rewards/frontier_coverage_20": 0.12108920887112617, "eval_rewards/frontier_coverage_25": 0.06418344844132662, "eval_rewards/frontier_coverage_5": 0.18230951577425003, "eval_rewards/frontier_ece_reward": 0.005655413027852774, "eval_rewards/frontier_entropy_batch_reward": -0.19229092076420784, "eval_runtime": 30.1689, "eval_samples_per_second": 16.573, "eval_signal/accuracy_reward/centered_abs_mean": 0.4793701171875, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.49709299951791763, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23968505859375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23968505859375, "eval_signal/advantage_abs_mean": 0.21164816245436668, "eval_signal/advantage_pre_scale_abs_mean": 0.21164816245436668, "eval_signal/advantage_pre_scale_std": 0.23054595291614532, "eval_signal/advantage_std": 0.23054595291614532, "eval_signal/brier_reward/centered_abs_mean": 0.18942880630493164, "eval_signal/brier_reward/group_bin_occupancy": 0.8125, "eval_signal/brier_reward/group_std_mean": 0.24447643756866455, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.007612279732711613, "eval_signal/brier_reward/weight": 0.040185440331697464, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.007612279732711613, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_bin_occupancy": 0.1328125, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0036958245909772813, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6796875, "eval_signal/frontier_aurc_reward/group_std_mean": 0.006954669952392578, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.03900147730019e-05, "eval_signal/frontier_aurc_reward/weight": 0.02175157703459263, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.03900147730019e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3917815089225769, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_1/group_std_mean": 0.4760092422366142, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.007587432046420872, "eval_signal/frontier_coverage_1/weight": 0.019366487860679626, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.007587432046420872, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3917815089225769, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_10/group_std_mean": 0.4760092422366142, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.007587432046420872, "eval_signal/frontier_coverage_10/weight": 0.019366487860679626, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.007587432046420872, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.38814665377140045, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_15/group_std_mean": 0.47172661870718, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007517083082348108, "eval_signal/frontier_coverage_15/weight": 0.019366605207324028, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.007517083082348108, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.24452262371778488, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9453125, "eval_signal/frontier_coverage_20/group_std_mean": 0.30539170652627945, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0047855316661298275, "eval_signal/frontier_coverage_20/weight": 0.019570916891098022, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0047855316661298275, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.10838207229971886, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.90625, "eval_signal/frontier_coverage_25/group_std_mean": 0.14063885807991028, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021711059671361, "eval_signal/frontier_coverage_25/weight": 0.020031966269016266, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021711059671361, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3917815089225769, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_5/group_std_mean": 0.4760092422366142, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.007587432046420872, "eval_signal/frontier_coverage_5/weight": 0.019366487860679626, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.007587432046420872, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.010417576879262924, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.953125, "eval_signal/frontier_ece_reward/group_std_mean": 0.012703315122053027, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015673820744268596, "eval_signal/frontier_ece_reward/weight": 0.1504555344581604, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015673820744268596, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28658779338002205, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5625, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.3573639839887619, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04896022565662861, "eval_signal/frontier_entropy_batch_reward/weight": 0.17083849012851715, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04896022565662861, "eval_steps_per_second": 0.133, "step": 250 }, { "adaptive_ema/accuracy_reward": 0.49717680829476807, "adaptive_ema/brier_reward": 0.7433735015687879, "adaptive_ema/format_reward": 0.9705789145165795, "adaptive_ema/frontier_aurc_reward": 0.025524089397445687, "adaptive_ema/frontier_coverage_1": 0.13136228238124845, "adaptive_ema/frontier_coverage_10": 0.13136228238124845, "adaptive_ema/frontier_coverage_15": 0.1313538576434497, "adaptive_ema/frontier_coverage_20": 0.12215141927429277, "adaptive_ema/frontier_coverage_25": 0.10166368874048068, "adaptive_ema/frontier_coverage_5": 0.13136228238124845, "adaptive_ema/frontier_ece_reward": 0.03467876901782855, "adaptive_ema/frontier_entropy_batch_reward": -0.09387897375412108, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.04003090411424637, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.021767470613121988, "adaptive_weight/frontier_coverage_1": 0.01940329633653164, "adaptive_weight/frontier_coverage_10": 0.01940329633653164, "adaptive_weight/frontier_coverage_15": 0.019403484463691712, "adaptive_weight/frontier_coverage_20": 0.01960904449224472, "adaptive_weight/frontier_coverage_25": 0.02006669230759144, "adaptive_weight/frontier_coverage_5": 0.01940329633653164, "adaptive_weight/frontier_ece_reward": 0.15057945251464844, "adaptive_weight/frontier_entropy_batch_reward": 0.17063305974006654, "calibration/aurc": 0.2760196631448222, "calibration/batch_distribution_entropy": 0.9658467385414122, "calibration/batch_entropy_100bins": 0.731850484679636, "calibration/batch_entropy_10bins": 0.9658467385414122, "calibration/batch_entropy_50bins": 0.8224371996463742, "calibration/batch_uniqueness": 0.8936126708984375, "calibration/buffer_distribution_entropy": 0.9880820657401277, "calibration/buffer_entropy_100bins": 0.7282793861138731, "calibration/buffer_entropy_10bins": 0.9880820657401277, "calibration/buffer_entropy_50bins": 0.8240600011509466, "calibration/confidence_entropy": 0.45068693737141163, "calibration/coverage@0%": 0.00703125, "calibration/coverage@1%": 0.00703125, "calibration/coverage@10%": 0.070703125, "calibration/coverage@15%": 0.14296875, "calibration/coverage@20%": 0.261328125, "calibration/coverage@25%": 0.344921875, "calibration/coverage@30%": 0.680078125, "calibration/coverage@5%": 0.059375, "calibration/ece": 0.1727737759491081, "calibration/mean_confidence": 0.48958559905089194, "calibration/prompt_uniqueness": 0.7189453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 708.8, "completions/max_terminated_length": 708.8, "completions/mean_length": 227.3673828125, "completions/mean_terminated_length": 227.3673828125, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.816, "grad_norm": 0.0008266636286862195, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 870555557.0, "reward": 0.8199009537696839, "reward_std": 0.07451344430446624, "rewards/accuracy_reward": 0.57900390625, "rewards/brier_reward": 0.7676302909851074, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0031252800021320582, "rewards/frontier_coverage_1": 0.06562656462192536, "rewards/frontier_coverage_10": 0.06562656462192536, "rewards/frontier_coverage_15": 0.06530472338199615, "rewards/frontier_coverage_20": 0.05655328780412674, "rewards/frontier_coverage_25": 0.04255493320524693, "rewards/frontier_coverage_5": 0.06562656462192536, "rewards/frontier_ece_reward": 0.0030927245039492845, "rewards/frontier_entropy_batch_reward": -0.04541266113519669, "signal/accuracy_reward/centered_abs_mean": 0.103240966796875, "signal/accuracy_reward/group_bin_occupancy": 0.171484375, "signal/accuracy_reward/group_std_mean": 0.1336445689201355, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0516204833984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0516204833984375, "signal/advantage_abs_mean": 0.0583218663930893, "signal/advantage_pre_scale_abs_mean": 0.0583218663930893, "signal/advantage_pre_scale_std": 0.10045389086008072, "signal/advantage_std": 0.10045389086008072, "signal/brier_reward/centered_abs_mean": 0.14593898355960847, "signal/brier_reward/group_bin_occupancy": 0.830859375, "signal/brier_reward/group_std_mean": 0.18419291973114013, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0058418047614395615, "signal/brier_reward/weight": 0.04003090411424637, "signal/brier_reward/weighted_centered_abs_mean": 0.0058418047614395615, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0031489692628383636, "signal/frontier_aurc_reward/group_bin_occupancy": 0.668359375, "signal/frontier_aurc_reward/group_std_mean": 0.00541405794210732, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.85463601257652e-05, "signal/frontier_aurc_reward/weight": 0.021767470613121988, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.85463601257652e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1957257032394409, "signal/frontier_coverage_1/group_bin_occupancy": 0.851953125, "signal/frontier_coverage_1/group_std_mean": 0.24688530266284942, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037978495936840774, "signal/frontier_coverage_1/weight": 0.01940329633653164, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037978495936840774, "signal/frontier_coverage_10/centered_abs_mean": 0.1957257032394409, "signal/frontier_coverage_10/group_bin_occupancy": 0.851953125, "signal/frontier_coverage_10/group_std_mean": 0.24688530266284942, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037978495936840774, "signal/frontier_coverage_10/weight": 0.01940329633653164, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037978495936840774, "signal/frontier_coverage_15/centered_abs_mean": 0.19446902275085448, "signal/frontier_coverage_15/group_bin_occupancy": 0.85078125, "signal/frontier_coverage_15/group_std_mean": 0.245290607213974, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003773502167314291, "signal/frontier_coverage_15/weight": 0.019403484463691712, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003773502167314291, "signal/frontier_coverage_20/centered_abs_mean": 0.1260778859257698, "signal/frontier_coverage_20/group_bin_occupancy": 0.848046875, "signal/frontier_coverage_20/group_std_mean": 0.1604294866323471, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024724855553358792, "signal/frontier_coverage_20/weight": 0.01960904449224472, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024724855553358792, "signal/frontier_coverage_25/centered_abs_mean": 0.07231247574090957, "signal/frontier_coverage_25/group_bin_occupancy": 0.896484375, "signal/frontier_coverage_25/group_std_mean": 0.0922024741768837, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014510968467220664, "signal/frontier_coverage_25/weight": 0.02006669230759144, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014510968467220664, "signal/frontier_coverage_5/centered_abs_mean": 0.1957257032394409, "signal/frontier_coverage_5/group_bin_occupancy": 0.851953125, "signal/frontier_coverage_5/group_std_mean": 0.24688530266284942, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037978495936840774, "signal/frontier_coverage_5/weight": 0.01940329633653164, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037978495936840774, "signal/frontier_ece_reward/centered_abs_mean": 0.007009109575301408, "signal/frontier_ece_reward/group_bin_occupancy": 0.7265625, "signal/frontier_ece_reward/group_std_mean": 0.008824359998106957, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010554298525676132, "signal/frontier_ece_reward/weight": 0.15057945251464844, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010554298525676132, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06629724502563476, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.538671875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.08260493278503418, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011315522249788046, "signal/frontier_entropy_batch_reward/weight": 0.17063305974006654, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011315522249788046, "step": 255 }, { "adaptive_ema/accuracy_reward": 0.5002342215898916, "adaptive_ema/brier_reward": 0.7453260418626751, "adaptive_ema/format_reward": 0.9720150777575703, "adaptive_ema/frontier_aurc_reward": 0.0241355303840874, "adaptive_ema/frontier_coverage_1": 0.12974147653299598, "adaptive_ema/frontier_coverage_10": 0.12974147653299598, "adaptive_ema/frontier_coverage_15": 0.12965426872532376, "adaptive_ema/frontier_coverage_20": 0.119753790365455, "adaptive_ema/frontier_coverage_25": 0.09915882452118516, "adaptive_ema/frontier_coverage_5": 0.12974147653299598, "adaptive_ema/frontier_ece_reward": 0.03317143645873037, "adaptive_ema/frontier_entropy_batch_reward": -0.09209263060264991, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.03973116055130958, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.021801147237420083, "adaptive_weight/frontier_coverage_1": 0.01944187395274639, "adaptive_weight/frontier_coverage_10": 0.01944187395274639, "adaptive_weight/frontier_coverage_15": 0.019443822279572486, "adaptive_weight/frontier_coverage_20": 0.019665002822875977, "adaptive_weight/frontier_coverage_25": 0.020125101879239084, "adaptive_weight/frontier_coverage_5": 0.01944187395274639, "adaptive_weight/frontier_ece_reward": 0.15083298087120056, "adaptive_weight/frontier_entropy_batch_reward": 0.17037515938282013, "calibration/aurc": 0.31624097243406496, "calibration/batch_distribution_entropy": 0.9582681490821494, "calibration/batch_entropy_100bins": 0.7523830817628614, "calibration/batch_entropy_10bins": 0.9582681490821494, "calibration/batch_entropy_50bins": 0.8359059233335117, "calibration/batch_uniqueness": 0.8990996762118865, "calibration/buffer_distribution_entropy": 0.988342808364413, "calibration/buffer_entropy_100bins": 0.7313311553469075, "calibration/buffer_entropy_10bins": 0.988342808364413, "calibration/buffer_entropy_50bins": 0.8260630727799582, "calibration/confidence_entropy": 0.4638325415171778, "calibration/coverage@0%": 0.0203125, "calibration/coverage@1%": 0.0203125, "calibration/coverage@10%": 0.169140625, "calibration/coverage@15%": 0.19375, "calibration/coverage@20%": 0.25940251956947163, "calibration/coverage@25%": 0.3587940313111546, "calibration/coverage@30%": 0.49090784001956944, "calibration/coverage@5%": 0.132421875, "calibration/ece": 0.12444036356409001, "calibration/mean_confidence": 0.432767708842955, "calibration/prompt_uniqueness": 0.7441850834905047, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 912.6, "completions/max_terminated_length": 533.2, "completions/mean_length": 233.1248046875, "completions/mean_terminated_length": 232.87039489746093, "completions/min_length": 105.8, "completions/min_terminated_length": 105.8, "epoch": 0.832, "grad_norm": 0.0009671748848631978, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 887951107.0, "reward": 0.8058806896209717, "reward_std": 0.07332316190004348, "rewards/accuracy_reward": 0.54541015625, "rewards/brier_reward": 0.7977130174636841, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0025748027022928, "rewards/frontier_coverage_1": 0.12267700582742691, "rewards/frontier_coverage_10": 0.12267700582742691, "rewards/frontier_coverage_15": 0.12098241597414017, "rewards/frontier_coverage_20": 0.08454428240656853, "rewards/frontier_coverage_25": 0.05629109740257263, "rewards/frontier_coverage_5": 0.12267700582742691, "rewards/frontier_ece_reward": 0.004667013976722955, "rewards/frontier_entropy_batch_reward": -0.06709275171160697, "signal/accuracy_reward/centered_abs_mean": 0.100091552734375, "signal/accuracy_reward/group_bin_occupancy": 0.16875, "signal/accuracy_reward/group_std_mean": 0.1287603422999382, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0500457763671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0500457763671875, "signal/advantage_abs_mean": 0.05775661841034889, "signal/advantage_pre_scale_abs_mean": 0.05775661841034889, "signal/advantage_pre_scale_std": 0.10012289136648178, "signal/advantage_std": 0.10012289136648178, "signal/brier_reward/centered_abs_mean": 0.12413015365600585, "signal/brier_reward/group_bin_occupancy": 0.821875, "signal/brier_reward/group_std_mean": 0.1587320536375046, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.004931798111647368, "signal/brier_reward/weight": 0.03973116055130958, "signal/brier_reward/weighted_centered_abs_mean": 0.004931798111647368, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024345603305846453, "signal/frontier_aurc_reward/group_bin_occupancy": 0.678125, "signal/frontier_aurc_reward/group_std_mean": 0.004217228572815657, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3077179472893476e-05, "signal/frontier_aurc_reward/weight": 0.021801147237420083, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3077179472893476e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17841730415821075, "signal/frontier_coverage_1/group_bin_occupancy": 0.85546875, "signal/frontier_coverage_1/group_std_mean": 0.22835949063301086, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034687367733567953, "signal/frontier_coverage_1/weight": 0.01944187395274639, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034687367733567953, "signal/frontier_coverage_10/centered_abs_mean": 0.17841730415821075, "signal/frontier_coverage_10/group_bin_occupancy": 0.85546875, "signal/frontier_coverage_10/group_std_mean": 0.22835949063301086, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034687367733567953, "signal/frontier_coverage_10/weight": 0.01944187395274639, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034687367733567953, "signal/frontier_coverage_15/centered_abs_mean": 0.17739293575286866, "signal/frontier_coverage_15/group_bin_occupancy": 0.855078125, "signal/frontier_coverage_15/group_std_mean": 0.22711012065410613, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00344918011687696, "signal/frontier_coverage_15/weight": 0.019443822279572486, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00344918011687696, "signal/frontier_coverage_20/centered_abs_mean": 0.11429814547300339, "signal/frontier_coverage_20/group_bin_occupancy": 0.842578125, "signal/frontier_coverage_20/group_std_mean": 0.14738841652870177, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00224763173609972, "signal/frontier_coverage_20/weight": 0.019665002822875977, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00224763173609972, "signal/frontier_coverage_25/centered_abs_mean": 0.06452373266220093, "signal/frontier_coverage_25/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_25/group_std_mean": 0.08209397196769715, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012985036475583911, "signal/frontier_coverage_25/weight": 0.020125101879239084, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012985036475583911, "signal/frontier_coverage_5/centered_abs_mean": 0.17841730415821075, "signal/frontier_coverage_5/group_bin_occupancy": 0.85546875, "signal/frontier_coverage_5/group_std_mean": 0.22835949063301086, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034687367733567953, "signal/frontier_coverage_5/weight": 0.01944187395274639, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034687367733567953, "signal/frontier_ece_reward/centered_abs_mean": 0.006456979457288981, "signal/frontier_ece_reward/group_bin_occupancy": 0.683984375, "signal/frontier_ece_reward/group_std_mean": 0.008200454525649548, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009739116998389363, "signal/frontier_ece_reward/weight": 0.15083298087120056, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009739116998389363, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09052571952342987, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.556640625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.11314128339290619, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015424954518675805, "signal/frontier_entropy_batch_reward/weight": 0.17037515938282013, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015424954518675805, "step": 260 }, { "adaptive_ema/accuracy_reward": 0.5019849432815855, "adaptive_ema/brier_reward": 0.7473832881442929, "adaptive_ema/format_reward": 0.9733741927078121, "adaptive_ema/frontier_aurc_reward": 0.022813292688545976, "adaptive_ema/frontier_coverage_1": 0.12871053705245333, "adaptive_ema/frontier_coverage_10": 0.12871053705245333, "adaptive_ema/frontier_coverage_15": 0.12861132513256696, "adaptive_ema/frontier_coverage_20": 0.11723889345214158, "adaptive_ema/frontier_coverage_25": 0.09663794196021616, "adaptive_ema/frontier_coverage_5": 0.12871053705245333, "adaptive_ema/frontier_ece_reward": 0.03175659418592736, "adaptive_ema/frontier_entropy_batch_reward": -0.08984249954012516, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.039427295327186584, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.02184015288949013, "adaptive_weight/frontier_coverage_1": 0.019473345205187797, "adaptive_weight/frontier_coverage_10": 0.019473345205187797, "adaptive_weight/frontier_coverage_15": 0.019475562125444414, "adaptive_weight/frontier_coverage_20": 0.01972973793745041, "adaptive_weight/frontier_coverage_25": 0.020190170407295226, "adaptive_weight/frontier_coverage_5": 0.019473345205187797, "adaptive_weight/frontier_ece_reward": 0.15111920833587647, "adaptive_weight/frontier_entropy_batch_reward": 0.17009783387184144, "calibration/aurc": 0.3186903471152664, "calibration/batch_distribution_entropy": 0.9701558712644506, "calibration/batch_entropy_100bins": 0.7717989175152062, "calibration/batch_entropy_10bins": 0.9701558712644506, "calibration/batch_entropy_50bins": 0.8565927875400245, "calibration/batch_uniqueness": 0.9124298095703125, "calibration/buffer_distribution_entropy": 0.9888908768893276, "calibration/buffer_entropy_100bins": 0.7359311187257248, "calibration/buffer_entropy_10bins": 0.9888908768893276, "calibration/buffer_entropy_50bins": 0.8294183692710424, "calibration/confidence_entropy": 0.49147967100944756, "calibration/coverage@0%": 0.0203125, "calibration/coverage@1%": 0.0203125, "calibration/coverage@10%": 0.161328125, "calibration/coverage@15%": 0.262109375, "calibration/coverage@20%": 0.382421875, "calibration/coverage@25%": 0.489453125, "calibration/coverage@30%": 0.544921875, "calibration/coverage@5%": 0.10078125, "calibration/ece": 0.1589255205922068, "calibration/mean_confidence": 0.49733127628279317, "calibration/prompt_uniqueness": 0.769921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 784.2, "completions/max_terminated_length": 601.8, "completions/mean_length": 232.1328125, "completions/mean_terminated_length": 232.00542602539062, "completions/min_length": 111.8, "completions/min_terminated_length": 111.8, "epoch": 0.848, "grad_norm": 0.0008154821116477251, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 905342515.0, "reward": 0.8021765947341919, "reward_std": 0.07231762930750847, "rewards/accuracy_reward": 0.532421875, "rewards/brier_reward": 0.7882626295089722, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002877800026908517, "rewards/frontier_coverage_1": 0.11268590837717056, "rewards/frontier_coverage_10": 0.11268590837717056, "rewards/frontier_coverage_15": 0.11268590837717056, "rewards/frontier_coverage_20": 0.0692408874630928, "rewards/frontier_coverage_25": 0.04865131340920925, "rewards/frontier_coverage_5": 0.11268590837717056, "rewards/frontier_ece_reward": 0.004459475306794048, "rewards/frontier_entropy_batch_reward": -0.039942212775349616, "signal/accuracy_reward/centered_abs_mean": 0.09129638671875, "signal/accuracy_reward/group_bin_occupancy": 0.172265625, "signal/accuracy_reward/group_std_mean": 0.12554115206003189, "signal/accuracy_reward/group_zero_std_frac": 0.621875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045648193359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.045648193359375, "signal/advantage_abs_mean": 0.053890705853700635, "signal/advantage_pre_scale_abs_mean": 0.053890705853700635, "signal/advantage_pre_scale_std": 0.09712902307510377, "signal/advantage_std": 0.09712902307510377, "signal/brier_reward/centered_abs_mean": 0.12816624343395233, "signal/brier_reward/group_bin_occupancy": 0.824609375, "signal/brier_reward/group_std_mean": 0.1650087833404541, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005053586699068547, "signal/brier_reward/weight": 0.039427295327186584, "signal/brier_reward/weighted_centered_abs_mean": 0.005053586699068547, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027573672123253345, "signal/frontier_aurc_reward/group_bin_occupancy": 0.667578125, "signal/frontier_aurc_reward/group_std_mean": 0.004844694584608078, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.021907247486524e-05, "signal/frontier_aurc_reward/weight": 0.02184015288949013, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.021907247486524e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17698953449726104, "signal/frontier_coverage_1/group_bin_occupancy": 0.85390625, "signal/frontier_coverage_1/group_std_mean": 0.2280345469713211, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034466007724404334, "signal/frontier_coverage_1/weight": 0.019473345205187797, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034466007724404334, "signal/frontier_coverage_10/centered_abs_mean": 0.17698953449726104, "signal/frontier_coverage_10/group_bin_occupancy": 0.85390625, "signal/frontier_coverage_10/group_std_mean": 0.2280345469713211, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034466007724404334, "signal/frontier_coverage_10/weight": 0.019473345205187797, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034466007724404334, "signal/frontier_coverage_15/centered_abs_mean": 0.17698953449726104, "signal/frontier_coverage_15/group_bin_occupancy": 0.85390625, "signal/frontier_coverage_15/group_std_mean": 0.2280345469713211, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003446992952376604, "signal/frontier_coverage_15/weight": 0.019475562125444414, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003446992952376604, "signal/frontier_coverage_20/centered_abs_mean": 0.11172761619091034, "signal/frontier_coverage_20/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_20/group_std_mean": 0.14430533349514008, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022044140379875897, "signal/frontier_coverage_20/weight": 0.01972973793745041, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022044140379875897, "signal/frontier_coverage_25/centered_abs_mean": 0.06382529959082603, "signal/frontier_coverage_25/group_bin_occupancy": 0.888671875, "signal/frontier_coverage_25/group_std_mean": 0.08236846774816513, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001288694189861417, "signal/frontier_coverage_25/weight": 0.020190170407295226, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001288694189861417, "signal/frontier_coverage_5/centered_abs_mean": 0.17698953449726104, "signal/frontier_coverage_5/group_bin_occupancy": 0.85390625, "signal/frontier_coverage_5/group_std_mean": 0.2280345469713211, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034466007724404334, "signal/frontier_coverage_5/weight": 0.019473345205187797, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034466007724404334, "signal/frontier_ece_reward/centered_abs_mean": 0.006545740459114313, "signal/frontier_ece_reward/group_bin_occupancy": 0.701953125, "signal/frontier_ece_reward/group_std_mean": 0.008315418288111687, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000989195634610951, "signal/frontier_ece_reward/weight": 0.15111920833587647, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000989195634610951, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06969715096056461, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.51640625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.0891161508858204, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01185646653175354, "signal/frontier_entropy_batch_reward/weight": 0.17009783387184144, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01185646653175354, "step": 265 }, { "adaptive_ema/accuracy_reward": 0.5044514748920346, "adaptive_ema/brier_reward": 0.7493093621849761, "adaptive_ema/format_reward": 0.9746733789377287, "adaptive_ema/frontier_aurc_reward": 0.021553571186420806, "adaptive_ema/frontier_coverage_1": 0.12737641092100635, "adaptive_ema/frontier_coverage_10": 0.12737641092100635, "adaptive_ema/frontier_coverage_15": 0.12728400434624482, "adaptive_ema/frontier_coverage_20": 0.1148155370049325, "adaptive_ema/frontier_coverage_25": 0.09428026537524757, "adaptive_ema/frontier_coverage_5": 0.12737641092100635, "adaptive_ema/frontier_ece_reward": 0.03040152289721601, "adaptive_ema/frontier_entropy_batch_reward": -0.08760520956054822, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.039141100645065305, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.02187635935842991, "adaptive_weight/frontier_coverage_1": 0.019510345160961153, "adaptive_weight/frontier_coverage_10": 0.019510345160961153, "adaptive_weight/frontier_coverage_15": 0.01951241083443165, "adaptive_weight/frontier_coverage_20": 0.019791184365749358, "adaptive_weight/frontier_coverage_25": 0.020250317454338074, "adaptive_weight/frontier_coverage_5": 0.019510345160961153, "adaptive_weight/frontier_ece_reward": 0.15138642489910126, "adaptive_weight/frontier_entropy_batch_reward": 0.16981116831302642, "calibration/aurc": 0.31419108840785065, "calibration/batch_distribution_entropy": 0.9680592532098515, "calibration/batch_entropy_100bins": 0.776597155140324, "calibration/batch_entropy_10bins": 0.9680592532098515, "calibration/batch_entropy_50bins": 0.8547908572094611, "calibration/batch_uniqueness": 0.91705322265625, "calibration/buffer_distribution_entropy": 0.9894312794661053, "calibration/buffer_entropy_100bins": 0.7407619660570571, "calibration/buffer_entropy_10bins": 0.9894312794661053, "calibration/buffer_entropy_50bins": 0.833047557256501, "calibration/confidence_entropy": 0.4909109986565158, "calibration/coverage@0%": 0.002734375, "calibration/coverage@1%": 0.002734375, "calibration/coverage@10%": 0.00625, "calibration/coverage@15%": 0.070703125, "calibration/coverage@20%": 0.18828125, "calibration/coverage@25%": 0.34140625, "calibration/coverage@30%": 0.471484375, "calibration/coverage@5%": 0.002734375, "calibration/ece": 0.14773077485521877, "calibration/mean_confidence": 0.5650817251447812, "calibration/prompt_uniqueness": 0.786669921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 756.6, "completions/max_terminated_length": 570.8, "completions/mean_length": 235.30107421875, "completions/mean_terminated_length": 235.17388916015625, "completions/min_length": 114.6, "completions/min_terminated_length": 114.6, "epoch": 0.864, "grad_norm": 0.001250621397048235, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 922738814.0, "reward": 0.8196427822113037, "reward_std": 0.07560298591852188, "rewards/accuracy_reward": 0.577734375, "rewards/brier_reward": 0.782896625995636, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002946721948683262, "rewards/frontier_coverage_1": 0.07985682934522628, "rewards/frontier_coverage_10": 0.07985682934522628, "rewards/frontier_coverage_15": 0.07995446622371674, "rewards/frontier_coverage_20": 0.05727446302771568, "rewards/frontier_coverage_25": 0.04549731090664864, "rewards/frontier_coverage_5": 0.07985682934522628, "rewards/frontier_ece_reward": 0.003623440582305193, "rewards/frontier_entropy_batch_reward": -0.05079686343669891, "signal/accuracy_reward/centered_abs_mean": 0.10323486328125, "signal/accuracy_reward/group_bin_occupancy": 0.175390625, "signal/accuracy_reward/group_std_mean": 0.13858965933322906, "signal/accuracy_reward/group_zero_std_frac": 0.596875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051617431640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.051617431640625, "signal/advantage_abs_mean": 0.056996123492717744, "signal/advantage_pre_scale_abs_mean": 0.056996123492717744, "signal/advantage_pre_scale_std": 0.100760318338871, "signal/advantage_std": 0.100760318338871, "signal/brier_reward/centered_abs_mean": 0.13495307713747023, "signal/brier_reward/group_bin_occupancy": 0.8296875, "signal/brier_reward/group_std_mean": 0.17243683338165283, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005282375589013099, "signal/brier_reward/weight": 0.039141100645065305, "signal/brier_reward/weighted_centered_abs_mean": 0.005282375589013099, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002958740387111902, "signal/frontier_aurc_reward/group_bin_occupancy": 0.670703125, "signal/frontier_aurc_reward/group_std_mean": 0.00494068767875433, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.472636378020979e-05, "signal/frontier_aurc_reward/weight": 0.02187635935842991, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.472636378020979e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18261632919311524, "signal/frontier_coverage_1/group_bin_occupancy": 0.86171875, "signal/frontier_coverage_1/group_std_mean": 0.2336251974105835, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035628742538392545, "signal/frontier_coverage_1/weight": 0.019510345160961153, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035628742538392545, "signal/frontier_coverage_10/centered_abs_mean": 0.18261632919311524, "signal/frontier_coverage_10/group_bin_occupancy": 0.86171875, "signal/frontier_coverage_10/group_std_mean": 0.2336251974105835, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035628742538392545, "signal/frontier_coverage_10/weight": 0.019510345160961153, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035628742538392545, "signal/frontier_coverage_15/centered_abs_mean": 0.1817552149295807, "signal/frontier_coverage_15/group_bin_occupancy": 0.861328125, "signal/frontier_coverage_15/group_std_mean": 0.23247010409832, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003546440601348877, "signal/frontier_coverage_15/weight": 0.01951241083443165, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003546440601348877, "signal/frontier_coverage_20/centered_abs_mean": 0.11168777346611022, "signal/frontier_coverage_20/group_bin_occupancy": 0.8515625, "signal/frontier_coverage_20/group_std_mean": 0.14396534562110902, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022104350849986077, "signal/frontier_coverage_20/weight": 0.019791184365749358, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022104350849986077, "signal/frontier_coverage_25/centered_abs_mean": 0.06536850556731225, "signal/frontier_coverage_25/group_bin_occupancy": 0.90234375, "signal/frontier_coverage_25/group_std_mean": 0.08358364701271057, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013237420003861188, "signal/frontier_coverage_25/weight": 0.020250317454338074, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013237420003861188, "signal/frontier_coverage_5/centered_abs_mean": 0.18261632919311524, "signal/frontier_coverage_5/group_bin_occupancy": 0.86171875, "signal/frontier_coverage_5/group_std_mean": 0.2336251974105835, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035628742538392545, "signal/frontier_coverage_5/weight": 0.019510345160961153, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035628742538392545, "signal/frontier_ece_reward/centered_abs_mean": 0.0068363240920007225, "signal/frontier_ece_reward/group_bin_occupancy": 0.705078125, "signal/frontier_ece_reward/group_std_mean": 0.008612703718245029, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010349399410188199, "signal/frontier_ece_reward/weight": 0.15138642489910126, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010349399410188199, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07362207397818565, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.571484375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.09382486641407013, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012501352466642857, "signal/frontier_entropy_batch_reward/weight": 0.16981116831302642, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.012501352466642857, "step": 270 }, { "adaptive_ema/accuracy_reward": 0.505869124740359, "adaptive_ema/brier_reward": 0.751255920640763, "adaptive_ema/format_reward": 0.975910806373159, "adaptive_ema/frontier_aurc_reward": 0.02033559825264712, "adaptive_ema/frontier_coverage_1": 0.1269209463031713, "adaptive_ema/frontier_coverage_10": 0.1269209463031713, "adaptive_ema/frontier_coverage_15": 0.12677453549308487, "adaptive_ema/frontier_coverage_20": 0.11302953540707772, "adaptive_ema/frontier_coverage_25": 0.09220718762997454, "adaptive_ema/frontier_coverage_5": 0.1269209463031713, "adaptive_ema/frontier_ece_reward": 0.02913881739932035, "adaptive_ema/frontier_entropy_batch_reward": -0.08627040592132958, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.03884957581758499, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.02191058695316315, "adaptive_weight/frontier_coverage_1": 0.019526761770248414, "adaptive_weight/frontier_coverage_10": 0.019526761770248414, "adaptive_weight/frontier_coverage_15": 0.019530036300420762, "adaptive_weight/frontier_coverage_20": 0.019837450236082077, "adaptive_weight/frontier_coverage_25": 0.02030315063893795, "adaptive_weight/frontier_coverage_5": 0.019526761770248414, "adaptive_weight/frontier_ece_reward": 0.1516319841146469, "adaptive_weight/frontier_entropy_batch_reward": 0.1696569263935089, "calibration/aurc": 0.39134423975519905, "calibration/batch_distribution_entropy": 0.9757211224535725, "calibration/batch_entropy_100bins": 0.7925706641499016, "calibration/batch_entropy_10bins": 0.9757211224535725, "calibration/batch_entropy_50bins": 0.8665987159588019, "calibration/batch_uniqueness": 0.9180042159134094, "calibration/buffer_distribution_entropy": 0.9899841993361905, "calibration/buffer_entropy_100bins": 0.7451823056316547, "calibration/buffer_entropy_10bins": 0.9899841993361905, "calibration/buffer_entropy_50bins": 0.8362615117432455, "calibration/confidence_entropy": 0.4627085505313879, "calibration/coverage@0%": 0.0007827788649706457, "calibration/coverage@1%": 0.0007827788649706457, "calibration/coverage@10%": 0.0007827788649706457, "calibration/coverage@15%": 0.0469185726516634, "calibration/coverage@20%": 0.11843046722113501, "calibration/coverage@25%": 0.18492768468688844, "calibration/coverage@30%": 0.2803074547455969, "calibration/coverage@5%": 0.0007827788649706457, "calibration/ece": 0.13390239180033145, "calibration/mean_confidence": 0.4874716846651296, "calibration/prompt_uniqueness": 0.7678843550825962, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 733.2, "completions/max_terminated_length": 546.2, "completions/mean_length": 233.93681640625, "completions/mean_terminated_length": 233.81001892089844, "completions/min_length": 109.6, "completions/min_terminated_length": 109.6, "epoch": 0.88, "grad_norm": 0.0028608383145183325, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 940281399.0, "reward": 0.7891320705413818, "reward_std": 0.07504131197929383, "rewards/accuracy_reward": 0.5111328125, "rewards/brier_reward": 0.7911717891693115, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0033999960869550705, "rewards/frontier_coverage_1": 0.1350691318511963, "rewards/frontier_coverage_10": 0.1350691318511963, "rewards/frontier_coverage_15": 0.13321488201618195, "rewards/frontier_coverage_20": 0.08778582438826561, "rewards/frontier_coverage_25": 0.05375445336103439, "rewards/frontier_coverage_5": 0.1350691318511963, "rewards/frontier_ece_reward": 0.005027260864153504, "rewards/frontier_entropy_batch_reward": -0.06602781862020493, "signal/accuracy_reward/centered_abs_mean": 0.0984375, "signal/accuracy_reward/group_bin_occupancy": 0.17265625, "signal/accuracy_reward/group_std_mean": 0.13037826269865035, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04921875, "signal/advantage_abs_mean": 0.058286719024181366, "signal/advantage_pre_scale_abs_mean": 0.058286719024181366, "signal/advantage_pre_scale_std": 0.09950221627950669, "signal/advantage_std": 0.09950221627950669, "signal/brier_reward/centered_abs_mean": 0.131209397315979, "signal/brier_reward/group_bin_occupancy": 0.826171875, "signal/brier_reward/group_std_mean": 0.16899926364421844, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005097524542361498, "signal/brier_reward/weight": 0.03884957581758499, "signal/brier_reward/weighted_centered_abs_mean": 0.005097524542361498, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0033862961456179617, "signal/frontier_aurc_reward/group_bin_occupancy": 0.669140625, "signal/frontier_aurc_reward/group_std_mean": 0.005641693249344826, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.419235189445316e-05, "signal/frontier_aurc_reward/weight": 0.02191058695316315, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.419235189445316e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18316951990127564, "signal/frontier_coverage_1/group_bin_occupancy": 0.86171875, "signal/frontier_coverage_1/group_std_mean": 0.23260467648506164, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003576703788712621, "signal/frontier_coverage_1/weight": 0.019526761770248414, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003576703788712621, "signal/frontier_coverage_10/centered_abs_mean": 0.18316951990127564, "signal/frontier_coverage_10/group_bin_occupancy": 0.86171875, "signal/frontier_coverage_10/group_std_mean": 0.23260467648506164, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003576703788712621, "signal/frontier_coverage_10/weight": 0.019526761770248414, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003576703788712621, "signal/frontier_coverage_15/centered_abs_mean": 0.18001371920108794, "signal/frontier_coverage_15/group_bin_occupancy": 0.86171875, "signal/frontier_coverage_15/group_std_mean": 0.22868903875350952, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035156650468707086, "signal/frontier_coverage_15/weight": 0.019530036300420762, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035156650468707086, "signal/frontier_coverage_20/centered_abs_mean": 0.11850059181451797, "signal/frontier_coverage_20/group_bin_occupancy": 0.861328125, "signal/frontier_coverage_20/group_std_mean": 0.15187331438064575, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00235068048350513, "signal/frontier_coverage_20/weight": 0.019837450236082077, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00235068048350513, "signal/frontier_coverage_25/centered_abs_mean": 0.06647183299064637, "signal/frontier_coverage_25/group_bin_occupancy": 0.89921875, "signal/frontier_coverage_25/group_std_mean": 0.08510075211524963, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013495617080479861, "signal/frontier_coverage_25/weight": 0.02030315063893795, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013495617080479861, "signal/frontier_coverage_5/centered_abs_mean": 0.18316951990127564, "signal/frontier_coverage_5/group_bin_occupancy": 0.86171875, "signal/frontier_coverage_5/group_std_mean": 0.23260467648506164, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003576703788712621, "signal/frontier_coverage_5/weight": 0.019526761770248414, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003576703788712621, "signal/frontier_ece_reward/centered_abs_mean": 0.006699068006128073, "signal/frontier_ece_reward/group_bin_occupancy": 0.692578125, "signal/frontier_ece_reward/group_std_mean": 0.008414249680936337, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010157921817153693, "signal/frontier_ece_reward/weight": 0.1516319841146469, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010157921817153693, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08991494029760361, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.547265625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.11144240945577621, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015255115181207656, "signal/frontier_entropy_batch_reward/weight": 0.1696569263935089, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015255115181207656, "step": 275 }, { "adaptive_ema/accuracy_reward": 0.5073937566060608, "adaptive_ema/brier_reward": 0.7527572799637534, "adaptive_ema/format_reward": 0.9770818442948807, "adaptive_ema/frontier_aurc_reward": 0.019175632617939066, "adaptive_ema/frontier_coverage_1": 0.12594929285212791, "adaptive_ema/frontier_coverage_10": 0.12594929285212791, "adaptive_ema/frontier_coverage_15": 0.1257502319704197, "adaptive_ema/frontier_coverage_20": 0.11108417689443409, "adaptive_ema/frontier_coverage_25": 0.09012292847501352, "adaptive_ema/frontier_coverage_5": 0.12594929285212791, "adaptive_ema/frontier_ece_reward": 0.027905660536216792, "adaptive_ema/frontier_entropy_batch_reward": -0.08442867341245203, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.038624754548072814, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.021942029893398284, "adaptive_weight/frontier_coverage_1": 0.019553394988179207, "adaptive_weight/frontier_coverage_10": 0.019553394988179207, "adaptive_weight/frontier_coverage_15": 0.019557848572731018, "adaptive_weight/frontier_coverage_20": 0.019885944575071333, "adaptive_weight/frontier_coverage_25": 0.020354868844151497, "adaptive_weight/frontier_coverage_5": 0.019553394988179207, "adaptive_weight/frontier_ece_reward": 0.15186265110969543, "adaptive_weight/frontier_entropy_batch_reward": 0.16941171288490295, "calibration/aurc": 0.3689805672848886, "calibration/batch_distribution_entropy": 0.9837880140435489, "calibration/batch_entropy_100bins": 0.8313778848884892, "calibration/batch_entropy_10bins": 0.9837880140435489, "calibration/batch_entropy_50bins": 0.8950411901827275, "calibration/batch_uniqueness": 0.9330657958984375, "calibration/buffer_distribution_entropy": 0.9904783408780954, "calibration/buffer_entropy_100bins": 0.7508931921521784, "calibration/buffer_entropy_10bins": 0.9904783408780954, "calibration/buffer_entropy_50bins": 0.840149593740642, "calibration/confidence_entropy": 0.4785073333743538, "calibration/coverage@0%": 0.011328125, "calibration/coverage@1%": 0.011328125, "calibration/coverage@10%": 0.047265625, "calibration/coverage@15%": 0.0625, "calibration/coverage@20%": 0.193359375, "calibration/coverage@25%": 0.3390625, "calibration/coverage@30%": 0.425390625, "calibration/coverage@5%": 0.029296875, "calibration/ece": 0.13935147315085533, "calibration/mean_confidence": 0.509082942248854, "calibration/prompt_uniqueness": 0.789599609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 955.0, "completions/max_terminated_length": 543.6, "completions/mean_length": 236.7720703125, "completions/mean_terminated_length": 236.51817321777344, "completions/min_length": 106.2, "completions/min_terminated_length": 106.2, "epoch": 0.896, "grad_norm": 0.0009539374732412398, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 957816793.0, "reward": 0.8073800921440124, "reward_std": 0.06512940153479577, "rewards/accuracy_reward": 0.54296875, "rewards/brier_reward": 0.7862501502037048, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002993757161311805, "rewards/frontier_coverage_1": 0.10751031935214997, "rewards/frontier_coverage_10": 0.10751031935214997, "rewards/frontier_coverage_15": 0.1055484026670456, "rewards/frontier_coverage_20": 0.07425087094306945, "rewards/frontier_coverage_25": 0.05060453489422798, "rewards/frontier_coverage_5": 0.10751031935214997, "rewards/frontier_ece_reward": 0.003828176483511925, "rewards/frontier_entropy_batch_reward": -0.03423706814646721, "signal/accuracy_reward/centered_abs_mean": 0.07969970703125, "signal/accuracy_reward/group_bin_occupancy": 0.16953125, "signal/accuracy_reward/group_std_mean": 0.11474124789237976, "signal/accuracy_reward/group_zero_std_frac": 0.64375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039849853515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.039849853515625, "signal/advantage_abs_mean": 0.04677758142352104, "signal/advantage_pre_scale_abs_mean": 0.04677758142352104, "signal/advantage_pre_scale_std": 0.08716107457876206, "signal/advantage_std": 0.08716107457876206, "signal/brier_reward/centered_abs_mean": 0.12659992277622223, "signal/brier_reward/group_bin_occupancy": 0.834765625, "signal/brier_reward/group_std_mean": 0.16378540694713592, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0048905015923082825, "signal/brier_reward/weight": 0.038624754548072814, "signal/brier_reward/weighted_centered_abs_mean": 0.0048905015923082825, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025218484457582234, "signal/frontier_aurc_reward/group_bin_occupancy": 0.669921875, "signal/frontier_aurc_reward/group_std_mean": 0.003992916271090508, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.532704090001061e-05, "signal/frontier_aurc_reward/weight": 0.021942029893398284, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.532704090001061e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17465949654579163, "signal/frontier_coverage_1/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_1/group_std_mean": 0.2252320319414139, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034151420928537845, "signal/frontier_coverage_1/weight": 0.019553394988179207, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034151420928537845, "signal/frontier_coverage_10/centered_abs_mean": 0.17465949654579163, "signal/frontier_coverage_10/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_10/group_std_mean": 0.2252320319414139, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034151420928537845, "signal/frontier_coverage_10/weight": 0.019553394988179207, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034151420928537845, "signal/frontier_coverage_15/centered_abs_mean": 0.1714522898197174, "signal/frontier_coverage_15/group_bin_occupancy": 0.859765625, "signal/frontier_coverage_15/group_std_mean": 0.22118231952190398, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033531927037984133, "signal/frontier_coverage_15/weight": 0.019557848572731018, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033531927037984133, "signal/frontier_coverage_20/centered_abs_mean": 0.11263496875762939, "signal/frontier_coverage_20/group_bin_occupancy": 0.860546875, "signal/frontier_coverage_20/group_std_mean": 0.1457061290740967, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00223980862647295, "signal/frontier_coverage_20/weight": 0.019885944575071333, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00223980862647295, "signal/frontier_coverage_25/centered_abs_mean": 0.06343068853020668, "signal/frontier_coverage_25/group_bin_occupancy": 0.894140625, "signal/frontier_coverage_25/group_std_mean": 0.08181680142879486, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012910918798297645, "signal/frontier_coverage_25/weight": 0.020354868844151497, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012910918798297645, "signal/frontier_coverage_5/centered_abs_mean": 0.17465949654579163, "signal/frontier_coverage_5/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_5/group_std_mean": 0.2252320319414139, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034151420928537845, "signal/frontier_coverage_5/weight": 0.019553394988179207, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034151420928537845, "signal/frontier_ece_reward/centered_abs_mean": 0.006351580470800399, "signal/frontier_ece_reward/group_bin_occupancy": 0.691796875, "signal/frontier_ece_reward/group_std_mean": 0.008096476551145315, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009645341080613435, "signal/frontier_ece_reward/weight": 0.15186265110969543, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009645341080613435, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.062499994039535524, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.584375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.07872299402952194, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01058815661817789, "signal/frontier_entropy_batch_reward/weight": 0.16941171288490295, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01058815661817789, "step": 280 }, { "adaptive_ema/accuracy_reward": 0.5092284077458263, "adaptive_ema/brier_reward": 0.7546247262433801, "adaptive_ema/format_reward": 0.978191525834433, "adaptive_ema/frontier_aurc_reward": 0.018114457775894748, "adaptive_ema/frontier_coverage_1": 0.12504303037808023, "adaptive_ema/frontier_coverage_10": 0.12504303037808023, "adaptive_ema/frontier_coverage_15": 0.12474519740835857, "adaptive_ema/frontier_coverage_20": 0.10942419283222868, "adaptive_ema/frontier_coverage_25": 0.08823268569686613, "adaptive_ema/frontier_coverage_5": 0.12504303037808023, "adaptive_ema/frontier_ece_reward": 0.02673011646802669, "adaptive_ema/frontier_entropy_batch_reward": -0.08223391461640496, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.038353316485881805, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.021977385878562926, "adaptive_weight/frontier_coverage_1": 0.019584020227193834, "adaptive_weight/frontier_coverage_10": 0.019584020227193834, "adaptive_weight/frontier_coverage_15": 0.01959068663418293, "adaptive_weight/frontier_coverage_20": 0.019933614134788512, "adaptive_weight/frontier_coverage_25": 0.020407940819859506, "adaptive_weight/frontier_coverage_5": 0.019584020227193834, "adaptive_weight/frontier_ece_reward": 0.15212669968605042, "adaptive_weight/frontier_entropy_batch_reward": 0.16915828883647918, "calibration/aurc": 0.3819044789842734, "calibration/batch_distribution_entropy": 0.9745500386866468, "calibration/batch_entropy_100bins": 0.8460082275430061, "calibration/batch_entropy_10bins": 0.9745500386866468, "calibration/batch_entropy_50bins": 0.9023396987442025, "calibration/batch_uniqueness": 0.9325820998525798, "calibration/buffer_distribution_entropy": 0.9908606191108204, "calibration/buffer_entropy_100bins": 0.7591955423673246, "calibration/buffer_entropy_10bins": 0.9908606191108204, "calibration/buffer_entropy_50bins": 0.8460835465188536, "calibration/confidence_entropy": 0.4778993098135073, "calibration/coverage@0%": 0.016408543297455967, "calibration/coverage@1%": 0.016408543297455967, "calibration/coverage@10%": 0.057814793297455966, "calibration/coverage@15%": 0.09690710616438356, "calibration/coverage@20%": 0.14847572162426614, "calibration/coverage@25%": 0.27474162181996087, "calibration/coverage@30%": 0.3970485261741683, "calibration/coverage@5%": 0.05117416829745597, "calibration/ece": 0.12728103744166014, "calibration/mean_confidence": 0.4577738717529273, "calibration/prompt_uniqueness": 0.79909076035705, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 796.8, "completions/max_terminated_length": 651.6, "completions/mean_length": 241.9130859375, "completions/mean_terminated_length": 241.28506469726562, "completions/min_length": 109.6, "completions/min_terminated_length": 109.6, "epoch": 0.912, "grad_norm": 0.000874386983923614, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 975345279.0, "reward": 0.8047022581100464, "reward_std": 0.06991915851831436, "rewards/accuracy_reward": 0.54228515625, "rewards/brier_reward": 0.786166763305664, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.002516270847991109, "rewards/frontier_coverage_1": 0.10352101437747478, "rewards/frontier_coverage_10": 0.10352101437747478, "rewards/frontier_coverage_15": 0.10200221072882414, "rewards/frontier_coverage_20": 0.07823080904781818, "rewards/frontier_coverage_25": 0.051585903763771056, "rewards/frontier_coverage_5": 0.10352101437747478, "rewards/frontier_ece_reward": 0.0038088133092969655, "rewards/frontier_entropy_batch_reward": -0.04462176710367203, "signal/accuracy_reward/centered_abs_mean": 0.087933349609375, "signal/accuracy_reward/group_bin_occupancy": 0.169921875, "signal/accuracy_reward/group_std_mean": 0.1205834612250328, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0439666748046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0439666748046875, "signal/advantage_abs_mean": 0.052356043457984926, "signal/advantage_pre_scale_abs_mean": 0.052356043457984926, "signal/advantage_pre_scale_std": 0.09320106953382493, "signal/advantage_std": 0.09320106953382493, "signal/brier_reward/centered_abs_mean": 0.1347974494099617, "signal/brier_reward/group_bin_occupancy": 0.841015625, "signal/brier_reward/group_std_mean": 0.17360511124134065, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005169952008873224, "signal/brier_reward/weight": 0.038353316485881805, "signal/brier_reward/weighted_centered_abs_mean": 0.005169952008873224, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0033145630266517402, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023487197468057275, "signal/frontier_aurc_reward/group_bin_occupancy": 0.68046875, "signal/frontier_aurc_reward/group_std_mean": 0.004058520402759313, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1618300494737925e-05, "signal/frontier_aurc_reward/weight": 0.021977385878562926, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1618300494737925e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18932445645332335, "signal/frontier_coverage_1/group_bin_occupancy": 0.865625, "signal/frontier_coverage_1/group_std_mean": 0.24096913039684295, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037079003173857926, "signal/frontier_coverage_1/weight": 0.019584020227193834, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037079003173857926, "signal/frontier_coverage_10/centered_abs_mean": 0.18932445645332335, "signal/frontier_coverage_10/group_bin_occupancy": 0.865625, "signal/frontier_coverage_10/group_std_mean": 0.24096913039684295, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037079003173857926, "signal/frontier_coverage_10/weight": 0.019584020227193834, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037079003173857926, "signal/frontier_coverage_15/centered_abs_mean": 0.18330602943897248, "signal/frontier_coverage_15/group_bin_occupancy": 0.863671875, "signal/frontier_coverage_15/group_std_mean": 0.2332424372434616, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003591257706284523, "signal/frontier_coverage_15/weight": 0.01959068663418293, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003591257706284523, "signal/frontier_coverage_20/centered_abs_mean": 0.12120090126991272, "signal/frontier_coverage_20/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_20/group_std_mean": 0.15430061519145966, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002416037442162633, "signal/frontier_coverage_20/weight": 0.019933614134788512, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002416037442162633, "signal/frontier_coverage_25/centered_abs_mean": 0.07087931782007217, "signal/frontier_coverage_25/group_bin_occupancy": 0.9, "signal/frontier_coverage_25/group_std_mean": 0.09026498645544052, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014465219341218472, "signal/frontier_coverage_25/weight": 0.020407940819859506, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014465219341218472, "signal/frontier_coverage_5/centered_abs_mean": 0.18932445645332335, "signal/frontier_coverage_5/group_bin_occupancy": 0.865625, "signal/frontier_coverage_5/group_std_mean": 0.24096913039684295, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037079003173857926, "signal/frontier_coverage_5/weight": 0.019584020227193834, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037079003173857926, "signal/frontier_ece_reward/centered_abs_mean": 0.006613946333527565, "signal/frontier_ece_reward/group_bin_occupancy": 0.712109375, "signal/frontier_ece_reward/group_std_mean": 0.008411933667957783, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010061656357720494, "signal/frontier_ece_reward/weight": 0.15212669968605042, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010061656357720494, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07715009674429893, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.570703125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.09899689108133317, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013051050342619419, "signal/frontier_entropy_batch_reward/weight": 0.16915828883647918, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013051050342619419, "step": 285 }, { "adaptive_ema/accuracy_reward": 0.5106151035471637, "adaptive_ema/brier_reward": 0.75579317795121, "adaptive_ema/format_reward": 0.9792288181548298, "adaptive_ema/frontier_aurc_reward": 0.0170897314251875, "adaptive_ema/frontier_coverage_1": 0.12426640506099074, "adaptive_ema/frontier_coverage_10": 0.12426640506099074, "adaptive_ema/frontier_coverage_15": 0.12390274216129707, "adaptive_ema/frontier_coverage_20": 0.1082497737327092, "adaptive_ema/frontier_coverage_25": 0.0865964311924109, "adaptive_ema/frontier_coverage_5": 0.12426640506099074, "adaptive_ema/frontier_ece_reward": 0.025613024390907325, "adaptive_ema/frontier_entropy_batch_reward": -0.08251692020997156, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.038155969232320786, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.021991834789514542, "adaptive_weight/frontier_coverage_1": 0.019593842327594757, "adaptive_weight/frontier_coverage_10": 0.019593842327594757, "adaptive_weight/frontier_coverage_15": 0.01960197910666466, "adaptive_weight/frontier_coverage_20": 0.01995220109820366, "adaptive_weight/frontier_coverage_25": 0.02043667696416378, "adaptive_weight/frontier_coverage_5": 0.019593842327594757, "adaptive_weight/frontier_ece_reward": 0.1522425502538681, "adaptive_weight/frontier_entropy_batch_reward": 0.16913725733757018, "calibration/aurc": 0.4009781099250545, "calibration/batch_distribution_entropy": 0.977108755039781, "calibration/batch_entropy_100bins": 0.8685464926262375, "calibration/batch_entropy_10bins": 0.977108755039781, "calibration/batch_entropy_50bins": 0.9190047463555822, "calibration/batch_uniqueness": 0.9368927001953125, "calibration/buffer_distribution_entropy": 0.9910334895593215, "calibration/buffer_entropy_100bins": 0.7698517366213622, "calibration/buffer_entropy_10bins": 0.9910334895593215, "calibration/buffer_entropy_50bins": 0.8536433225242437, "calibration/confidence_entropy": 0.4729662905175904, "calibration/coverage@0%": 0.003515625, "calibration/coverage@1%": 0.003515625, "calibration/coverage@10%": 0.003515625, "calibration/coverage@15%": 0.003515625, "calibration/coverage@20%": 0.0203125, "calibration/coverage@25%": 0.168359375, "calibration/coverage@30%": 0.30625, "calibration/coverage@5%": 0.003515625, "calibration/ece": 0.12290451159765922, "calibration/mean_confidence": 0.46484572290423004, "calibration/prompt_uniqueness": 0.7974609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1149.0, "completions/max_terminated_length": 593.4, "completions/mean_length": 237.97587890625, "completions/mean_terminated_length": 237.46857604980468, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.928, "grad_norm": 0.0007995866471901536, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 992808968.0, "reward": 0.7858694791793823, "reward_std": 0.0720029890537262, "rewards/accuracy_reward": 0.5267578125, "rewards/brier_reward": 0.7718318223953247, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0032001886516809463, "rewards/frontier_coverage_1": 0.11377599537372589, "rewards/frontier_coverage_10": 0.11377599537372589, "rewards/frontier_coverage_15": 0.11207389831542969, "rewards/frontier_coverage_20": 0.08739523887634278, "rewards/frontier_coverage_25": 0.05544539391994476, "rewards/frontier_coverage_5": 0.11377599537372589, "rewards/frontier_ece_reward": 0.004172366205602884, "rewards/frontier_entropy_batch_reward": -0.11274452954530716, "signal/accuracy_reward/centered_abs_mean": 0.083251953125, "signal/accuracy_reward/group_bin_occupancy": 0.16796875, "signal/accuracy_reward/group_std_mean": 0.11550529301166534, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0416259765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0416259765625, "signal/advantage_abs_mean": 0.053884600102901456, "signal/advantage_pre_scale_abs_mean": 0.053884600102901456, "signal/advantage_pre_scale_std": 0.09473606795072556, "signal/advantage_std": 0.09473606795072556, "signal/brier_reward/centered_abs_mean": 0.1376921683549881, "signal/brier_reward/group_bin_occupancy": 0.82109375, "signal/brier_reward/group_std_mean": 0.17607857882976533, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.005253981985151768, "signal/brier_reward/weight": 0.038155969232320786, "signal/brier_reward/weighted_centered_abs_mean": 0.005253981985151768, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003057646518573165, "signal/frontier_aurc_reward/group_bin_occupancy": 0.66875, "signal/frontier_aurc_reward/group_std_mean": 0.005382006615400314, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.72438254696317e-05, "signal/frontier_aurc_reward/weight": 0.021991834789514542, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.72438254696317e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18453809320926667, "signal/frontier_coverage_1/group_bin_occupancy": 0.8546875, "signal/frontier_coverage_1/group_std_mean": 0.23699354827404023, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003615809418261051, "signal/frontier_coverage_1/weight": 0.019593842327594757, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003615809418261051, "signal/frontier_coverage_10/centered_abs_mean": 0.18453809320926667, "signal/frontier_coverage_10/group_bin_occupancy": 0.8546875, "signal/frontier_coverage_10/group_std_mean": 0.23699354827404023, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003615809418261051, "signal/frontier_coverage_10/weight": 0.019593842327594757, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003615809418261051, "signal/frontier_coverage_15/centered_abs_mean": 0.18022237420082093, "signal/frontier_coverage_15/group_bin_occupancy": 0.8546875, "signal/frontier_coverage_15/group_std_mean": 0.23157143592834473, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035327094607055186, "signal/frontier_coverage_15/weight": 0.01960197910666466, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035327094607055186, "signal/frontier_coverage_20/centered_abs_mean": 0.1192478209733963, "signal/frontier_coverage_20/group_bin_occupancy": 0.860546875, "signal/frontier_coverage_20/group_std_mean": 0.15341487228870393, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023792308289557695, "signal/frontier_coverage_20/weight": 0.01995220109820366, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023792308289557695, "signal/frontier_coverage_25/centered_abs_mean": 0.07185964584350586, "signal/frontier_coverage_25/group_bin_occupancy": 0.89140625, "signal/frontier_coverage_25/group_std_mean": 0.09106694906949997, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014685554197058082, "signal/frontier_coverage_25/weight": 0.02043667696416378, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014685554197058082, "signal/frontier_coverage_5/centered_abs_mean": 0.18453809320926667, "signal/frontier_coverage_5/group_bin_occupancy": 0.8546875, "signal/frontier_coverage_5/group_std_mean": 0.23699354827404023, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003615809418261051, "signal/frontier_coverage_5/weight": 0.019593842327594757, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003615809418261051, "signal/frontier_ece_reward/centered_abs_mean": 0.00682686697691679, "signal/frontier_ece_reward/group_bin_occupancy": 0.690625, "signal/frontier_ece_reward/group_std_mean": 0.008775676786899566, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010393401607871056, "signal/frontier_ece_reward/weight": 0.1522425502538681, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010393401607871056, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1246018260717392, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.496875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.15648339837789535, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02107503414154053, "signal/frontier_entropy_batch_reward/weight": 0.16913725733757018, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02107503414154053, "step": 290 }, { "adaptive_ema/accuracy_reward": 0.5108080126735147, "adaptive_ema/brier_reward": 0.7569582347148616, "adaptive_ema/format_reward": 0.9802267878664974, "adaptive_ema/frontier_aurc_reward": 0.016089259993850302, "adaptive_ema/frontier_coverage_1": 0.12413362836872002, "adaptive_ema/frontier_coverage_10": 0.12413362836872002, "adaptive_ema/frontier_coverage_15": 0.12361727269615874, "adaptive_ema/frontier_coverage_20": 0.10712179072242926, "adaptive_ema/frontier_coverage_25": 0.08498310727964524, "adaptive_ema/frontier_coverage_5": 0.12413362836872002, "adaptive_ema/frontier_ece_reward": 0.024585941797864795, "adaptive_ema/frontier_entropy_batch_reward": -0.08299495039063112, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.03796238005161286, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.022007527574896813, "adaptive_weight/frontier_coverage_1": 0.01959085576236248, "adaptive_weight/frontier_coverage_10": 0.01959085576236248, "adaptive_weight/frontier_coverage_15": 0.0196024052798748, "adaptive_weight/frontier_coverage_20": 0.019971366599202157, "adaptive_weight/frontier_coverage_25": 0.020466551557183264, "adaptive_weight/frontier_coverage_5": 0.01959085576236248, "adaptive_weight/frontier_ece_reward": 0.15235669910907745, "adaptive_weight/frontier_entropy_batch_reward": 0.16916049420833587, "calibration/aurc": 0.27811625685180363, "calibration/batch_distribution_entropy": 0.9834259313774243, "calibration/batch_entropy_100bins": 0.8790850388790513, "calibration/batch_entropy_10bins": 0.9834259313774243, "calibration/batch_entropy_50bins": 0.9269705754561457, "calibration/batch_uniqueness": 0.9436670570885285, "calibration/buffer_distribution_entropy": 0.99125029700843, "calibration/buffer_entropy_100bins": 0.7810425527134057, "calibration/buffer_entropy_10bins": 0.99125029700843, "calibration/buffer_entropy_50bins": 0.8617512833270945, "calibration/confidence_entropy": 0.4925551201594316, "calibration/coverage@0%": 0.017206610812133073, "calibration/coverage@1%": 0.017206610812133073, "calibration/coverage@10%": 0.09738716976516634, "calibration/coverage@15%": 0.2462993823385519, "calibration/coverage@20%": 0.3373822773972603, "calibration/coverage@25%": 0.45305620107632094, "calibration/coverage@30%": 0.5652030332681017, "calibration/coverage@5%": 0.05552990459882583, "calibration/ece": 0.10293797015478956, "calibration/mean_confidence": 0.46638448443790226, "calibration/prompt_uniqueness": 0.8167916924102497, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 926.4, "completions/max_terminated_length": 518.8, "completions/mean_length": 234.8603515625, "completions/mean_terminated_length": 234.6066162109375, "completions/min_length": 107.8, "completions/min_terminated_length": 107.8, "epoch": 0.944, "grad_norm": 0.0025131264701485634, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 1010189362.0, "reward": 0.7966355443000793, "reward_std": 0.08069588989019394, "rewards/accuracy_reward": 0.5318359375, "rewards/brier_reward": 0.7839723229408264, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0030146833509206773, "rewards/frontier_coverage_1": 0.11095014810562134, "rewards/frontier_coverage_10": 0.11095014810562134, "rewards/frontier_coverage_15": 0.10751423984766006, "rewards/frontier_coverage_20": 0.0784274235367775, "rewards/frontier_coverage_25": 0.05127616748213768, "rewards/frontier_coverage_5": 0.11095014810562134, "rewards/frontier_ece_reward": 0.004399744141846895, "rewards/frontier_entropy_batch_reward": -0.06369005087763072, "signal/accuracy_reward/centered_abs_mean": 0.113671875, "signal/accuracy_reward/group_bin_occupancy": 0.175, "signal/accuracy_reward/group_std_mean": 0.14452168345451355, "signal/accuracy_reward/group_zero_std_frac": 0.6, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0568359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0568359375, "signal/advantage_abs_mean": 0.06398204565048218, "signal/advantage_pre_scale_abs_mean": 0.06398204565048218, "signal/advantage_pre_scale_std": 0.10589756518602371, "signal/advantage_std": 0.10589756518602371, "signal/brier_reward/centered_abs_mean": 0.13136824518442153, "signal/brier_reward/group_bin_occupancy": 0.848046875, "signal/brier_reward/group_std_mean": 0.16815738677978515, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.004987320583313704, "signal/brier_reward/weight": 0.03796238005161286, "signal/brier_reward/weighted_centered_abs_mean": 0.004987320583313704, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027758491691201926, "signal/frontier_aurc_reward/group_bin_occupancy": 0.68828125, "signal/frontier_aurc_reward/group_std_mean": 0.00486855860799551, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.108681991463527e-05, "signal/frontier_aurc_reward/weight": 0.022007527574896813, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.108681991463527e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19229323863983155, "signal/frontier_coverage_1/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_1/group_std_mean": 0.2426188260316849, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037671592552214863, "signal/frontier_coverage_1/weight": 0.01959085576236248, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037671592552214863, "signal/frontier_coverage_10/centered_abs_mean": 0.19229323863983155, "signal/frontier_coverage_10/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_10/group_std_mean": 0.2426188260316849, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037671592552214863, "signal/frontier_coverage_10/weight": 0.01959085576236248, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037671592552214863, "signal/frontier_coverage_15/centered_abs_mean": 0.18586346805095671, "signal/frontier_coverage_15/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_15/group_std_mean": 0.23480392396450042, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003643349697813392, "signal/frontier_coverage_15/weight": 0.0196024052798748, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003643349697813392, "signal/frontier_coverage_20/centered_abs_mean": 0.12204153388738632, "signal/frontier_coverage_20/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_20/group_std_mean": 0.15582017004489898, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024373249616473913, "signal/frontier_coverage_20/weight": 0.019971366599202157, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024373249616473913, "signal/frontier_coverage_25/centered_abs_mean": 0.06823899745941162, "signal/frontier_coverage_25/group_bin_occupancy": 0.916796875, "signal/frontier_coverage_25/group_std_mean": 0.08735780119895935, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013965919613838195, "signal/frontier_coverage_25/weight": 0.020466551557183264, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013965919613838195, "signal/frontier_coverage_5/centered_abs_mean": 0.19229323863983155, "signal/frontier_coverage_5/group_bin_occupancy": 0.86796875, "signal/frontier_coverage_5/group_std_mean": 0.2426188260316849, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037671592552214863, "signal/frontier_coverage_5/weight": 0.01959085576236248, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037671592552214863, "signal/frontier_ece_reward/centered_abs_mean": 0.007136920373886824, "signal/frontier_ece_reward/group_bin_occupancy": 0.7046875, "signal/frontier_ece_reward/group_std_mean": 0.009026623517274856, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010873484192416073, "signal/frontier_ece_reward/weight": 0.15235669910907745, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010873484192416073, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09417234137654304, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.52890625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.12170673459768296, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015933521930128335, "signal/frontier_entropy_batch_reward/weight": 0.16916049420833587, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015933521930128335, "step": 295 }, { "adaptive_ema/accuracy_reward": 0.5122639748209438, "adaptive_ema/brier_reward": 0.7585752915825011, "adaptive_ema/format_reward": 0.9811795896968777, "adaptive_ema/frontier_aurc_reward": 0.015144469697940383, "adaptive_ema/frontier_coverage_1": 0.12333728149075274, "adaptive_ema/frontier_coverage_10": 0.12333728149075274, "adaptive_ema/frontier_coverage_15": 0.12278979277989002, "adaptive_ema/frontier_coverage_20": 0.10565099024087207, "adaptive_ema/frontier_coverage_25": 0.08325280611908174, "adaptive_ema/frontier_coverage_5": 0.12333728149075274, "adaptive_ema/frontier_ece_reward": 0.023597073161841908, "adaptive_ema/frontier_entropy_batch_reward": -0.08117773493254943, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.037726181000471114, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.022038231790065765, "adaptive_weight/frontier_coverage_1": 0.019617187976837158, "adaptive_weight/frontier_coverage_10": 0.019617187976837158, "adaptive_weight/frontier_coverage_15": 0.01962943859398365, "adaptive_weight/frontier_coverage_20": 0.020012956112623215, "adaptive_weight/frontier_coverage_25": 0.02051416300237179, "adaptive_weight/frontier_coverage_5": 0.019617187976837158, "adaptive_weight/frontier_ece_reward": 0.1525774270296097, "adaptive_weight/frontier_entropy_batch_reward": 0.16895003616809845, "calibration/aurc": 0.3313838553836209, "calibration/batch_distribution_entropy": 0.9878222102541848, "calibration/batch_entropy_100bins": 0.8772238237138197, "calibration/batch_entropy_10bins": 0.9878222102541848, "calibration/batch_entropy_50bins": 0.9273256058028748, "calibration/batch_uniqueness": 0.9473175048828125, "calibration/buffer_distribution_entropy": 0.9916506011747319, "calibration/buffer_entropy_100bins": 0.7920718405068625, "calibration/buffer_entropy_10bins": 0.9916506011747319, "calibration/buffer_entropy_50bins": 0.8694198189536589, "calibration/confidence_entropy": 0.48771411926965297, "calibration/coverage@0%": 0.004296875, "calibration/coverage@1%": 0.004296875, "calibration/coverage@10%": 0.141015625, "calibration/coverage@15%": 0.2109375, "calibration/coverage@20%": 0.27578125, "calibration/coverage@25%": 0.31171875, "calibration/coverage@30%": 0.366796875, "calibration/coverage@5%": 0.0875, "calibration/ece": 0.143712651508147, "calibration/mean_confidence": 0.5249439933399102, "calibration/prompt_uniqueness": 0.8244140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 957.2, "completions/max_terminated_length": 626.2, "completions/mean_length": 231.46904296875, "completions/mean_terminated_length": 231.2143524169922, "completions/min_length": 107.4, "completions/min_terminated_length": 107.4, "epoch": 0.96, "grad_norm": 0.0007890698034316301, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 1027499925.0, "reward": 0.7937455177307129, "reward_std": 0.06518236324191093, "rewards/accuracy_reward": 0.523046875, "rewards/brier_reward": 0.7962269902229309, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003563016327098012, "rewards/frontier_coverage_1": 0.12367903590202331, "rewards/frontier_coverage_10": 0.12367903590202331, "rewards/frontier_coverage_15": 0.12319278419017791, "rewards/frontier_coverage_20": 0.08755376040935517, "rewards/frontier_coverage_25": 0.05181429237127304, "rewards/frontier_coverage_5": 0.12367903590202331, "rewards/frontier_ece_reward": 0.004861411638557911, "rewards/frontier_entropy_batch_reward": -0.06445286944508552, "signal/accuracy_reward/centered_abs_mean": 0.07904052734375, "signal/accuracy_reward/group_bin_occupancy": 0.164453125, "signal/accuracy_reward/group_std_mean": 0.10704978257417679, "signal/accuracy_reward/group_zero_std_frac": 0.684375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039520263671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.039520263671875, "signal/advantage_abs_mean": 0.04916732534766197, "signal/advantage_pre_scale_abs_mean": 0.04916732534766197, "signal/advantage_pre_scale_std": 0.08988670557737351, "signal/advantage_std": 0.08988670557737351, "signal/brier_reward/centered_abs_mean": 0.11760072112083435, "signal/brier_reward/group_bin_occupancy": 0.8328125, "signal/brier_reward/group_std_mean": 0.15275218188762665, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00443740775808692, "signal/brier_reward/weight": 0.037726181000471114, "signal/brier_reward/weighted_centered_abs_mean": 0.00443740775808692, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032274942379444836, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7015625, "signal/frontier_aurc_reward/group_std_mean": 0.005160880694165826, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.112704624887556e-05, "signal/frontier_aurc_reward/weight": 0.022038231790065765, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.112704624887556e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15767935812473297, "signal/frontier_coverage_1/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_1/group_std_mean": 0.20363860130310057, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030932387802749873, "signal/frontier_coverage_1/weight": 0.019617187976837158, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030932387802749873, "signal/frontier_coverage_10/centered_abs_mean": 0.15767935812473297, "signal/frontier_coverage_10/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_10/group_std_mean": 0.20363860130310057, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030932387802749873, "signal/frontier_coverage_10/weight": 0.019617187976837158, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030932387802749873, "signal/frontier_coverage_15/centered_abs_mean": 0.1573574274778366, "signal/frontier_coverage_15/group_bin_occupancy": 0.8625, "signal/frontier_coverage_15/group_std_mean": 0.20323737263679503, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030888537876307964, "signal/frontier_coverage_15/weight": 0.01962943859398365, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030888537876307964, "signal/frontier_coverage_20/centered_abs_mean": 0.10249871462583542, "signal/frontier_coverage_20/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_20/group_std_mean": 0.13297524452209472, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002051279554143548, "signal/frontier_coverage_20/weight": 0.020012956112623215, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002051279554143548, "signal/frontier_coverage_25/centered_abs_mean": 0.061453332751989366, "signal/frontier_coverage_25/group_bin_occupancy": 0.916015625, "signal/frontier_coverage_25/group_std_mean": 0.07838500589132309, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012606421019881963, "signal/frontier_coverage_25/weight": 0.02051416300237179, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012606421019881963, "signal/frontier_coverage_5/centered_abs_mean": 0.15767935812473297, "signal/frontier_coverage_5/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_5/group_std_mean": 0.20363860130310057, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030932387802749873, "signal/frontier_coverage_5/weight": 0.019617187976837158, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030932387802749873, "signal/frontier_ece_reward/centered_abs_mean": 0.006616297829896212, "signal/frontier_ece_reward/group_bin_occupancy": 0.698046875, "signal/frontier_ece_reward/group_std_mean": 0.008402452990412711, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010094881290569901, "signal/frontier_ece_reward/weight": 0.1525774270296097, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010094881290569901, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08187509179115296, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.533984375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.10262143462896348, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013833227381110192, "signal/frontier_entropy_batch_reward/weight": 0.16895003616809845, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013833227381110192, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.428103398377754, "eval_calibration/batch_distribution_entropy": 0.9376296667868993, "eval_calibration/batch_entropy_100bins": 0.6705273382953512, "eval_calibration/batch_entropy_10bins": 0.9376296667868993, "eval_calibration/batch_entropy_50bins": 0.7588804645140499, "eval_calibration/batch_uniqueness": 0.8896484375, "eval_calibration/buffer_distribution_entropy": 0.9919862190492793, "eval_calibration/buffer_entropy_100bins": 0.7985249951460675, "eval_calibration/buffer_entropy_10bins": 0.9919862190492793, "eval_calibration/buffer_entropy_50bins": 0.8737942300811827, "eval_calibration/confidence_entropy": 0.4579045077229937, "eval_calibration/coverage@0%": 0.078125, "eval_calibration/coverage@1%": 0.078125, "eval_calibration/coverage@10%": 0.078125, "eval_calibration/coverage@15%": 0.078125, "eval_calibration/coverage@20%": 0.1015625, "eval_calibration/coverage@25%": 0.171875, "eval_calibration/coverage@30%": 0.328125, "eval_calibration/coverage@5%": 0.078125, "eval_calibration/ece": 0.21414062500000003, "eval_calibration/mean_confidence": 0.480234375, "eval_calibration/prompt_uniqueness": 0.8896484375, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 430.5, "eval_completions/max_terminated_length": 430.5, "eval_completions/mean_length": 229.84199905395508, "eval_completions/mean_terminated_length": 229.84199905395508, "eval_completions/min_length": 131.75, "eval_completions/min_terminated_length": 131.75, "eval_loss": 0.0, "eval_num_tokens": 1027499925.0, "eval_reward": 0.7297923862934113, "eval_reward_std": 0.2275286726653576, "eval_rewards/accuracy_reward": 0.443359375, "eval_rewards/brier_reward": 0.7976783066987991, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0036502022994682193, "eval_rewards/frontier_coverage_1": 0.18035605922341347, "eval_rewards/frontier_coverage_10": 0.18035605922341347, "eval_rewards/frontier_coverage_15": 0.17741113528609276, "eval_rewards/frontier_coverage_20": 0.12365293130278587, "eval_rewards/frontier_coverage_25": 0.0637232419103384, "eval_rewards/frontier_coverage_5": 0.18035605922341347, "eval_rewards/frontier_ece_reward": 0.0058051192900165915, "eval_rewards/frontier_entropy_batch_reward": -0.24005889892578125, "eval_runtime": 22.5775, "eval_samples_per_second": 22.146, "eval_signal/accuracy_reward/centered_abs_mean": 0.4752197265625, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4949190020561218, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23760986328125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23760986328125, "eval_signal/advantage_abs_mean": 0.20552946254611015, "eval_signal/advantage_pre_scale_abs_mean": 0.20552946254611015, "eval_signal/advantage_pre_scale_std": 0.22503003850579262, "eval_signal/advantage_std": 0.22503003850579262, "eval_signal/brier_reward/centered_abs_mean": 0.1839733049273491, "eval_signal/brier_reward/group_bin_occupancy": 0.84375, "eval_signal/brier_reward/group_std_mean": 0.23671213164925575, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.006916819605976343, "eval_signal/brier_reward/weight": 0.037596866488456726, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.006916819605976343, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004666732216719538, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6484375, "eval_signal/frontier_aurc_reward/group_std_mean": 0.009033310692757368, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010289873716828879, "eval_signal/frontier_aurc_reward/weight": 0.022049419581890106, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010289873716828879, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3463926389813423, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_1/group_std_mean": 0.42951615899801254, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006794996908865869, "eval_signal/frontier_coverage_1/weight": 0.01961645856499672, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006794996908865869, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3463926389813423, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_10/group_std_mean": 0.42951615899801254, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006794996908865869, "eval_signal/frontier_coverage_10/weight": 0.01961645856499672, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006794996908865869, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.34066376090049744, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_15/group_std_mean": 0.42321472615003586, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006686707376502454, "eval_signal/frontier_coverage_15/weight": 0.01962846703827381, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006686707376502454, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.21344707161188126, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_20/group_std_mean": 0.2740728035569191, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004273193306289613, "eval_signal/frontier_coverage_20/weight": 0.02001992054283619, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004273193306289613, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.09510924108326435, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_25/group_std_mean": 0.12545426562428474, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019525631796568632, "eval_signal/frontier_coverage_25/weight": 0.020529689267277718, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019525631796568632, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3463926389813423, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_5/group_std_mean": 0.42951615899801254, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006794996908865869, "eval_signal/frontier_coverage_5/weight": 0.01961645856499672, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006794996908865869, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.01034590182825923, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8828125, "eval_signal/frontier_ece_reward/group_std_mean": 0.01258331467397511, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001579352654516697, "eval_signal/frontier_ece_reward/weight": 0.15265490114688873, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001579352654516697, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3177356719970703, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.578125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.37068361788988113, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.053688227199018, "eval_signal/frontier_entropy_batch_reward/weight": 0.16897135972976685, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.053688227199018, "eval_steps_per_second": 0.177, "step": 300 }, { "adaptive_ema/accuracy_reward": 0.5130771176454476, "adaptive_ema/brier_reward": 0.7603371332806945, "adaptive_ema/format_reward": 0.9820866134055427, "adaptive_ema/frontier_aurc_reward": 0.014237143105888164, "adaptive_ema/frontier_coverage_1": 0.1230539148089607, "adaptive_ema/frontier_coverage_10": 0.1230539148089607, "adaptive_ema/frontier_coverage_15": 0.12254646138111085, "adaptive_ema/frontier_coverage_20": 0.1048816065193943, "adaptive_ema/frontier_coverage_25": 0.08169918055750292, "adaptive_ema/frontier_coverage_5": 0.1230539148089607, "adaptive_ema/frontier_ece_reward": 0.02266221011791273, "adaptive_ema/frontier_entropy_batch_reward": -0.0802780809085945, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.03746383413672447, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.022066167369484902, "adaptive_weight/frontier_coverage_1": 0.019630317389965058, "adaptive_weight/frontier_coverage_10": 0.019630317389965058, "adaptive_weight/frontier_coverage_15": 0.019641677290201186, "adaptive_weight/frontier_coverage_20": 0.020037102699279784, "adaptive_weight/frontier_coverage_25": 0.02055603824555874, "adaptive_weight/frontier_coverage_5": 0.019630317389965058, "adaptive_weight/frontier_ece_reward": 0.15277635753154756, "adaptive_weight/frontier_entropy_batch_reward": 0.16886787116527557, "calibration/aurc": 0.2831852439692231, "calibration/batch_distribution_entropy": 0.9723902640811122, "calibration/batch_entropy_100bins": 0.8672982440029516, "calibration/batch_entropy_10bins": 0.9723902640811122, "calibration/batch_entropy_50bins": 0.9170440971790507, "calibration/batch_uniqueness": 0.9440137046805166, "calibration/buffer_distribution_entropy": 0.9921364559876962, "calibration/buffer_entropy_100bins": 0.8027849052984001, "calibration/buffer_entropy_10bins": 0.9921364559876962, "calibration/buffer_entropy_50bins": 0.8766328649266398, "calibration/confidence_entropy": 0.5113814259674367, "calibration/coverage@0%": 0.01682751225490196, "calibration/coverage@1%": 0.01682751225490196, "calibration/coverage@10%": 0.0895435049019608, "calibration/coverage@15%": 0.2998468137254902, "calibration/coverage@20%": 0.45614276960784317, "calibration/coverage@25%": 0.5272732843137254, "calibration/coverage@30%": 0.5866973039215686, "calibration/coverage@5%": 0.03871476715686274, "calibration/ece": 0.15126782397040078, "calibration/mean_confidence": 0.4896648492617291, "calibration/prompt_uniqueness": 0.8269227916070498, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1167.0, "completions/max_terminated_length": 742.2, "completions/mean_length": 229.1451171875, "completions/mean_terminated_length": 228.6332275390625, "completions/min_length": 105.6, "completions/min_terminated_length": 105.6, "epoch": 0.976, "grad_norm": 0.001429693540558219, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 1044707491.0, "reward": 0.8066218614578247, "reward_std": 0.07456310316920281, "rewards/accuracy_reward": 0.55078125, "rewards/brier_reward": 0.7886098504066468, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0032080563250929117, "rewards/frontier_coverage_1": 0.09642277602106333, "rewards/frontier_coverage_10": 0.09642277602106333, "rewards/frontier_coverage_15": 0.0966072978451848, "rewards/frontier_coverage_20": 0.07433595582842827, "rewards/frontier_coverage_25": 0.04629525393247604, "rewards/frontier_coverage_5": 0.09642277602106333, "rewards/frontier_ece_reward": 0.003962028119713068, "rewards/frontier_entropy_batch_reward": -0.051515225879848005, "signal/accuracy_reward/centered_abs_mean": 0.0962158203125, "signal/accuracy_reward/group_bin_occupancy": 0.171875, "signal/accuracy_reward/group_std_mean": 0.12887984663248062, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04810791015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04810791015625, "signal/advantage_abs_mean": 0.05684085339307785, "signal/advantage_pre_scale_abs_mean": 0.05684085339307785, "signal/advantage_pre_scale_std": 0.09820054322481156, "signal/advantage_std": 0.09820054322481156, "signal/brier_reward/centered_abs_mean": 0.12333909422159195, "signal/brier_reward/group_bin_occupancy": 0.84453125, "signal/brier_reward/group_std_mean": 0.15947476029396057, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.004620636254549027, "signal/brier_reward/weight": 0.03746383413672447, "signal/brier_reward/weighted_centered_abs_mean": 0.004620636254549027, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029812861699610948, "signal/frontier_aurc_reward/group_bin_occupancy": 0.68359375, "signal/frontier_aurc_reward/group_std_mean": 0.0049844134598970415, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.578704924322664e-05, "signal/frontier_aurc_reward/weight": 0.022066167369484902, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.578704924322664e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1713780403137207, "signal/frontier_coverage_1/group_bin_occupancy": 0.875390625, "signal/frontier_coverage_1/group_std_mean": 0.2194644033908844, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003364281542599201, "signal/frontier_coverage_1/weight": 0.019630317389965058, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003364281542599201, "signal/frontier_coverage_10/centered_abs_mean": 0.1713780403137207, "signal/frontier_coverage_10/group_bin_occupancy": 0.875390625, "signal/frontier_coverage_10/group_std_mean": 0.2194644033908844, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003364281542599201, "signal/frontier_coverage_10/weight": 0.019630317389965058, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003364281542599201, "signal/frontier_coverage_15/centered_abs_mean": 0.16927050352096557, "signal/frontier_coverage_15/group_bin_occupancy": 0.875, "signal/frontier_coverage_15/group_std_mean": 0.21687354445457457, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033248161897063257, "signal/frontier_coverage_15/weight": 0.019641677290201186, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033248161897063257, "signal/frontier_coverage_20/centered_abs_mean": 0.11072558313608169, "signal/frontier_coverage_20/group_bin_occupancy": 0.859765625, "signal/frontier_coverage_20/group_std_mean": 0.14288919419050217, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022185867186635734, "signal/frontier_coverage_20/weight": 0.020037102699279784, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022185867186635734, "signal/frontier_coverage_25/centered_abs_mean": 0.061330854147672656, "signal/frontier_coverage_25/group_bin_occupancy": 0.917578125, "signal/frontier_coverage_25/group_std_mean": 0.0784957006573677, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001260715490207076, "signal/frontier_coverage_25/weight": 0.02055603824555874, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001260715490207076, "signal/frontier_coverage_5/centered_abs_mean": 0.1713780403137207, "signal/frontier_coverage_5/group_bin_occupancy": 0.875390625, "signal/frontier_coverage_5/group_std_mean": 0.2194644033908844, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003364281542599201, "signal/frontier_coverage_5/weight": 0.019630317389965058, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003364281542599201, "signal/frontier_ece_reward/centered_abs_mean": 0.006669245660305023, "signal/frontier_ece_reward/group_bin_occupancy": 0.703125, "signal/frontier_ece_reward/group_std_mean": 0.008504109550267458, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010189112043008208, "signal/frontier_ece_reward/weight": 0.15277635753154756, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010189112043008208, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.08715428188443183, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.51796875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.10973945707082748, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014718150720000267, "signal/frontier_entropy_batch_reward/weight": 0.16886787116527557, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014718150720000267, "step": 305 }, { "adaptive_ema/accuracy_reward": 0.5144250010969874, "adaptive_ema/brier_reward": 0.7617638121707504, "adaptive_ema/format_reward": 0.9829559898913545, "adaptive_ema/frontier_aurc_reward": 0.013385174144901188, "adaptive_ema/frontier_coverage_1": 0.12253397598106337, "adaptive_ema/frontier_coverage_10": 0.12253397598106337, "adaptive_ema/frontier_coverage_15": 0.12198978612716108, "adaptive_ema/frontier_coverage_20": 0.10383560650446637, "adaptive_ema/frontier_coverage_25": 0.08025628350493652, "adaptive_ema/frontier_coverage_5": 0.12253397598106337, "adaptive_ema/frontier_ece_reward": 0.02175329515905596, "adaptive_ema/frontier_entropy_batch_reward": -0.0801542770711169, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.037239187955856325, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.022084273025393487, "adaptive_weight/frontier_coverage_1": 0.019641099125146867, "adaptive_weight/frontier_coverage_10": 0.019641099125146867, "adaptive_weight/frontier_coverage_15": 0.019653279706835745, "adaptive_weight/frontier_coverage_20": 0.02005964070558548, "adaptive_weight/frontier_coverage_25": 0.020587437599897385, "adaptive_weight/frontier_coverage_5": 0.019641099125146867, "adaptive_weight/frontier_ece_reward": 0.15291174948215486, "adaptive_weight/frontier_entropy_batch_reward": 0.16884112656116484, "calibration/aurc": 0.3806839677424766, "calibration/batch_distribution_entropy": 0.9715458108502546, "calibration/batch_entropy_100bins": 0.8537208499198261, "calibration/batch_entropy_10bins": 0.9715458108502546, "calibration/batch_entropy_50bins": 0.9048500226531984, "calibration/batch_uniqueness": 0.9303741455078125, "calibration/buffer_distribution_entropy": 0.9925188566651506, "calibration/buffer_entropy_100bins": 0.8121605905038793, "calibration/buffer_entropy_10bins": 0.9925188566651506, "calibration/buffer_entropy_50bins": 0.883147629124353, "calibration/confidence_entropy": 0.4781765688390339, "calibration/coverage@0%": 0.022265625, "calibration/coverage@1%": 0.022265625, "calibration/coverage@10%": 0.07578125, "calibration/coverage@15%": 0.104296875, "calibration/coverage@20%": 0.116015625, "calibration/coverage@25%": 0.15, "calibration/coverage@30%": 0.348046875, "calibration/coverage@5%": 0.04375, "calibration/ece": 0.13601852483138854, "calibration/mean_confidence": 0.4368255651329841, "calibration/prompt_uniqueness": 0.783056640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 709.0, "completions/max_terminated_length": 491.6, "completions/mean_length": 225.83916015625, "completions/mean_terminated_length": 225.71083068847656, "completions/min_length": 110.2, "completions/min_terminated_length": 110.2, "epoch": 0.992, "grad_norm": 0.002593899378553033, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 1062148564.0, "reward": 0.7919328927993774, "reward_std": 0.06939845085144043, "rewards/accuracy_reward": 0.5267578125, "rewards/brier_reward": 0.7912951111793518, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0029651729855686426, "rewards/frontier_coverage_1": 0.12737788558006286, "rewards/frontier_coverage_10": 0.12737788558006286, "rewards/frontier_coverage_15": 0.12562896311283112, "rewards/frontier_coverage_20": 0.09472279995679855, "rewards/frontier_coverage_25": 0.05767645165324211, "rewards/frontier_coverage_5": 0.12737788558006286, "rewards/frontier_ece_reward": 0.004302942892536521, "rewards/frontier_entropy_batch_reward": -0.08593676090240479, "signal/accuracy_reward/centered_abs_mean": 0.0891845703125, "signal/accuracy_reward/group_bin_occupancy": 0.164453125, "signal/accuracy_reward/group_std_mean": 0.11510567218065262, "signal/accuracy_reward/group_zero_std_frac": 0.684375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04459228515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04459228515625, "signal/advantage_abs_mean": 0.05407559126615524, "signal/advantage_pre_scale_abs_mean": 0.05407559126615524, "signal/advantage_pre_scale_std": 0.09398611634969711, "signal/advantage_std": 0.09398611634969711, "signal/brier_reward/centered_abs_mean": 0.12326372712850571, "signal/brier_reward/group_bin_occupancy": 0.836328125, "signal/brier_reward/group_std_mean": 0.1573864758014679, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.004590229969471693, "signal/brier_reward/weight": 0.037239187955856325, "signal/brier_reward/weighted_centered_abs_mean": 0.004590229969471693, "signal/format_reward/centered_abs_mean": 0.0003662109375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.000768545875325799, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00018310546875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002825642959214747, "signal/frontier_aurc_reward/group_bin_occupancy": 0.68984375, "signal/frontier_aurc_reward/group_std_mean": 0.00474727526307106, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.240200782485772e-05, "signal/frontier_aurc_reward/weight": 0.022084273025393487, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.240200782485772e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17649457454681397, "signal/frontier_coverage_1/group_bin_occupancy": 0.868359375, "signal/frontier_coverage_1/group_std_mean": 0.22425515353679656, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034664351958781483, "signal/frontier_coverage_1/weight": 0.019641099125146867, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034664351958781483, "signal/frontier_coverage_10/centered_abs_mean": 0.17649457454681397, "signal/frontier_coverage_10/group_bin_occupancy": 0.868359375, "signal/frontier_coverage_10/group_std_mean": 0.22425515353679656, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034664351958781483, "signal/frontier_coverage_10/weight": 0.019641099125146867, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034664351958781483, "signal/frontier_coverage_15/centered_abs_mean": 0.17462271451950073, "signal/frontier_coverage_15/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_15/group_std_mean": 0.22184259593486785, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034317933954298497, "signal/frontier_coverage_15/weight": 0.019653279706835745, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034317933954298497, "signal/frontier_coverage_20/centered_abs_mean": 0.11235518455505371, "signal/frontier_coverage_20/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_20/group_std_mean": 0.14412140250205993, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022537729702889918, "signal/frontier_coverage_20/weight": 0.02005964070558548, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022537729702889918, "signal/frontier_coverage_25/centered_abs_mean": 0.06445452049374581, "signal/frontier_coverage_25/group_bin_occupancy": 0.908203125, "signal/frontier_coverage_25/group_std_mean": 0.08258575052022935, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013269589049741625, "signal/frontier_coverage_25/weight": 0.020587437599897385, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013269589049741625, "signal/frontier_coverage_5/centered_abs_mean": 0.17649457454681397, "signal/frontier_coverage_5/group_bin_occupancy": 0.868359375, "signal/frontier_coverage_5/group_std_mean": 0.22425515353679656, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034664351958781483, "signal/frontier_coverage_5/weight": 0.019641099125146867, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034664351958781483, "signal/frontier_ece_reward/centered_abs_mean": 0.006545371748507023, "signal/frontier_ece_reward/group_bin_occupancy": 0.694921875, "signal/frontier_ece_reward/group_std_mean": 0.008297445718199015, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010008613695390522, "signal/frontier_ece_reward/weight": 0.15291174948215486, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010008613695390522, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10476705580949783, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.50859375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.1305326849222183, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017689590714871882, "signal/frontier_entropy_batch_reward/weight": 0.16884112656116484, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017689590714871882, "step": 310 }, { "adaptive_ema/accuracy_reward": 0.5154819198209648, "adaptive_ema/brier_reward": 0.7622238970963693, "adaptive_ema/format_reward": 0.9835315309777346, "adaptive_ema/frontier_aurc_reward": 0.012814884519278785, "adaptive_ema/frontier_coverage_1": 0.12143998690674097, "adaptive_ema/frontier_coverage_10": 0.12143998690674097, "adaptive_ema/frontier_coverage_15": 0.12086318699406459, "adaptive_ema/frontier_coverage_20": 0.10259387455745778, "adaptive_ema/frontier_coverage_25": 0.07906396950440622, "adaptive_ema/frontier_coverage_5": 0.12143998690674097, "adaptive_ema/frontier_ece_reward": 0.021118504741223176, "adaptive_ema/frontier_entropy_batch_reward": -0.07954886454452184, "adaptive_weight/accuracy_reward": 0.5, "adaptive_weight/brier_reward": 0.03715994581580162, "adaptive_weight/format_reward": 0.5, "adaptive_weight/frontier_aurc_reward": 0.022092683240771294, "adaptive_weight/frontier_coverage_1": 0.01966171059757471, "adaptive_weight/frontier_coverage_10": 0.01966171059757471, "adaptive_weight/frontier_coverage_15": 0.019674619659781456, "adaptive_weight/frontier_coverage_20": 0.020083477720618248, "adaptive_weight/frontier_coverage_25": 0.020610064268112183, "adaptive_weight/frontier_coverage_5": 0.01966171059757471, "adaptive_weight/frontier_ece_reward": 0.15298082679510117, "adaptive_weight/frontier_entropy_batch_reward": 0.1687132492661476, "calibration/aurc": 0.3092712618845156, "calibration/batch_distribution_entropy": 0.9647997895562282, "calibration/batch_entropy_100bins": 0.842838417523208, "calibration/batch_entropy_10bins": 0.9647997895562282, "calibration/batch_entropy_50bins": 0.900232393020596, "calibration/batch_uniqueness": 0.9379348754882812, "calibration/buffer_distribution_entropy": 0.992895519242504, "calibration/buffer_entropy_100bins": 0.8180349692082165, "calibration/buffer_entropy_10bins": 0.992895519242504, "calibration/buffer_entropy_50bins": 0.887059539018632, "calibration/confidence_entropy": 0.48557998461300034, "calibration/coverage@0%": 0.017578125, "calibration/coverage@1%": 0.017578125, "calibration/coverage@10%": 0.021484375, "calibration/coverage@15%": 0.021484375, "calibration/coverage@20%": 0.1484375, "calibration/coverage@25%": 0.3115234375, "calibration/coverage@30%": 0.591796875, "calibration/coverage@5%": 0.021484375, "calibration/ece": 0.17626870645936057, "calibration/mean_confidence": 0.5701375435406395, "calibration/prompt_uniqueness": 0.7572021484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000244140625, "completions/max_length": 984.0, "completions/max_terminated_length": 444.5, "completions/mean_length": 226.9221420288086, "completions/mean_terminated_length": 226.60242462158203, "completions/min_length": 111.5, "completions/min_terminated_length": 111.5, "epoch": 0.9984, "num_tokens": 1069081719.0, "reward": 0.7988417744636536, "reward_std": 0.06897459551692009, "rewards/accuracy_reward": 0.54345703125, "rewards/brier_reward": 0.752604752779007, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0036660623736679554, "rewards/frontier_coverage_1": 0.06480471789836884, "rewards/frontier_coverage_10": 0.06480471789836884, "rewards/frontier_coverage_15": 0.0636993870139122, "rewards/frontier_coverage_20": 0.04550405964255333, "rewards/frontier_coverage_25": 0.03341560810804367, "rewards/frontier_coverage_5": 0.06480471789836884, "rewards/frontier_ece_reward": 0.002690421766601503, "rewards/frontier_entropy_batch_reward": -0.045069485902786255, "signal/accuracy_reward/centered_abs_mean": 0.0859375, "signal/accuracy_reward/group_bin_occupancy": 0.1669921875, "signal/accuracy_reward/group_std_mean": 0.11501816660165787, "signal/accuracy_reward/group_zero_std_frac": 0.6640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04296875, "signal/advantage_abs_mean": 0.05269451253116131, "signal/advantage_pre_scale_abs_mean": 0.05269451253116131, "signal/advantage_pre_scale_std": 0.09441451355814934, "signal/advantage_std": 0.09441451355814934, "signal/brier_reward/centered_abs_mean": 0.13213703781366348, "signal/brier_reward/group_bin_occupancy": 0.83984375, "signal/brier_reward/group_std_mean": 0.16977553814649582, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.004910205025225878, "signal/brier_reward/weight": 0.03715994581580162, "signal/brier_reward/weighted_centered_abs_mean": 0.004910205025225878, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0036444071447476745, "signal/frontier_aurc_reward/group_bin_occupancy": 0.6748046875, "signal/frontier_aurc_reward/group_std_mean": 0.0065204238053411245, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.051491022342816e-05, "signal/frontier_aurc_reward/weight": 0.022092683240771294, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.051491022342816e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.168254055082798, "signal/frontier_coverage_1/group_bin_occupancy": 0.8759765625, "signal/frontier_coverage_1/group_std_mean": 0.21480195224285126, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033081687288358808, "signal/frontier_coverage_1/weight": 0.01966171059757471, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033081687288358808, "signal/frontier_coverage_10/centered_abs_mean": 0.168254055082798, "signal/frontier_coverage_10/group_bin_occupancy": 0.8759765625, "signal/frontier_coverage_10/group_std_mean": 0.21480195224285126, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033081687288358808, "signal/frontier_coverage_10/weight": 0.01966171059757471, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033081687288358808, "signal/frontier_coverage_15/centered_abs_mean": 0.16302235424518585, "signal/frontier_coverage_15/group_bin_occupancy": 0.875, "signal/frontier_coverage_15/group_std_mean": 0.20827266573905945, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032074202317744493, "signal/frontier_coverage_15/weight": 0.019674619659781456, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032074202317744493, "signal/frontier_coverage_20/centered_abs_mean": 0.0979037694633007, "signal/frontier_coverage_20/group_bin_occupancy": 0.875, "signal/frontier_coverage_20/group_std_mean": 0.1266292929649353, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019662610720843077, "signal/frontier_coverage_20/weight": 0.020083477720618248, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019662610720843077, "signal/frontier_coverage_25/centered_abs_mean": 0.05765395425260067, "signal/frontier_coverage_25/group_bin_occupancy": 0.9013671875, "signal/frontier_coverage_25/group_std_mean": 0.07497931271791458, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011882567778229713, "signal/frontier_coverage_25/weight": 0.020610064268112183, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011882567778229713, "signal/frontier_coverage_5/centered_abs_mean": 0.168254055082798, "signal/frontier_coverage_5/group_bin_occupancy": 0.8759765625, "signal/frontier_coverage_5/group_std_mean": 0.21480195224285126, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033081687288358808, "signal/frontier_coverage_5/weight": 0.01966171059757471, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033081687288358808, "signal/frontier_ece_reward/centered_abs_mean": 0.006677088560536504, "signal/frontier_ece_reward/group_bin_occupancy": 0.705078125, "signal/frontier_ece_reward/group_std_mean": 0.008518182206898928, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010214661015197635, "signal/frontier_ece_reward/weight": 0.15298082679510117, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010214661015197635, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07456976920366287, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.578125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.09598826617002487, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012581207789480686, "signal/frontier_entropy_batch_reward/weight": 0.1687132492661476, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.012581207789480686, "step": 312, "total_flos": 0.0, "train_loss": 0.0035973651800491214, "train_runtime": 61836.3414, "train_samples_per_second": 0.323, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1069081719, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }