Files
RLCR-v4-ks-uniqueness-cov0-…/trainer_state.json
ModelHub XC 7d0aa220d7 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-cov0-entropy100-cold-math
Source: Original Platform
2026-04-11 11:40:05 +08:00

7368 lines
475 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.49919376007799904,
"eval_steps": 50,
"global_step": 208,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.5102160159904601,
"calibration/batch_distribution_entropy": 0.2895044023882817,
"calibration/batch_entropy_100bins": 0.354793971262317,
"calibration/batch_entropy_10bins": 0.2895044023882817,
"calibration/batch_entropy_50bins": 0.4107426911912116,
"calibration/batch_uniqueness": 0.5093545043561868,
"calibration/confidence_entropy": 0.22127973516932448,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.46712570461757996,
"calibration/mean_confidence": 0.9131820617407878,
"calibration/prompt_uniqueness": 0.3733117777662954,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.021788194444444464,
"completions/max_length": 4062.8,
"completions/max_terminated_length": 4062.8,
"completions/mean_length": 515.3322021484375,
"completions/mean_terminated_length": 526.8361450195313,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.8,
"epoch": 0.011999850001874977,
"grad_norm": 0.00417915266007185,
"learning_rate": 5.952380952380953e-07,
"loss": 0.0033,
"num_tokens": 9050835.0,
"reward": 0.4895077347755432,
"reward_std": 0.4509033739566803,
"rewards/accuracy_reward": 0.26111110746860505,
"rewards/brier_reward": 0.3134358525276184,
"rewards/confidence_uniqueness_reward": 0.292756462097168,
"rewards/format_reward": 0.6008680462837219,
"rewards/frontier_aurc_reward": 0.2758327066898346,
"rewards/frontier_coverage_0": 0.2758327066898346,
"rewards/frontier_coverage_1": 0.2758327066898346,
"rewards/frontier_coverage_10": 0.2758327066898346,
"rewards/frontier_coverage_15": 0.2758327066898346,
"rewards/frontier_coverage_20": 0.2758327066898346,
"rewards/frontier_coverage_25": 0.2758327066898346,
"rewards/frontier_coverage_5": 0.2758327066898346,
"rewards/frontier_ece_reward": 0.2758327066898346,
"rewards/frontier_entropy_batch_reward": -0.5726763129234314,
"signal/accuracy_reward/centered_abs_mean": 0.30725911259651184,
"signal/accuracy_reward/group_bin_occupancy": 0.23958333333333334,
"signal/accuracy_reward/group_std_mean": 0.3683225452899933,
"signal/accuracy_reward/group_zero_std_frac": 0.08333333507180214,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15362955629825592,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15362955629825592,
"signal/advantage_abs_mean": 0.3866611957550049,
"signal/advantage_pre_scale_abs_mean": 0.3866611957550049,
"signal/advantage_pre_scale_std": 0.45703948736190797,
"signal/advantage_std": 0.45703948736190797,
"signal/brier_reward/centered_abs_mean": 0.3180504024028778,
"signal/brier_reward/group_bin_occupancy": 0.5190972222222222,
"signal/brier_reward/group_std_mean": 0.371950763463974,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03180503956973553,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.03180503956973553,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.23733226656913758,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6104166666666666,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2896383464336395,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023733228072524072,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023733228072524072,
"signal/format_reward/centered_abs_mean": 0.4396592855453491,
"signal/format_reward/group_bin_occupancy": 0.25,
"signal/format_reward/group_std_mean": 0.47461998462677,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.21982964277267455,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.21982964277267455,
"signal/frontier_aurc_reward/centered_abs_mean": 0.3085459768772125,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.3993055555555555,
"signal/frontier_aurc_reward/group_std_mean": 0.36748775839805603,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_coverage_0/centered_abs_mean": 0.3085459768772125,
"signal/frontier_coverage_0/group_bin_occupancy": 0.3993055555555555,
"signal/frontier_coverage_0/group_std_mean": 0.36748775839805603,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_coverage_1/centered_abs_mean": 0.3085459768772125,
"signal/frontier_coverage_1/group_bin_occupancy": 0.3993055555555555,
"signal/frontier_coverage_1/group_std_mean": 0.36748775839805603,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_coverage_10/centered_abs_mean": 0.3085459768772125,
"signal/frontier_coverage_10/group_bin_occupancy": 0.3993055555555555,
"signal/frontier_coverage_10/group_std_mean": 0.36748775839805603,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_coverage_15/centered_abs_mean": 0.3085459768772125,
"signal/frontier_coverage_15/group_bin_occupancy": 0.3993055555555555,
"signal/frontier_coverage_15/group_std_mean": 0.36748775839805603,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_coverage_20/centered_abs_mean": 0.3085459768772125,
"signal/frontier_coverage_20/group_bin_occupancy": 0.3993055555555555,
"signal/frontier_coverage_20/group_std_mean": 0.36748775839805603,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_coverage_25/centered_abs_mean": 0.3085459768772125,
"signal/frontier_coverage_25/group_bin_occupancy": 0.3993055555555555,
"signal/frontier_coverage_25/group_std_mean": 0.36748775839805603,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_coverage_5/centered_abs_mean": 0.3085459768772125,
"signal/frontier_coverage_5/group_bin_occupancy": 0.3993055555555555,
"signal/frontier_coverage_5/group_std_mean": 0.36748775839805603,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003856824804097414,
"signal/frontier_ece_reward/centered_abs_mean": 0.3085459768772125,
"signal/frontier_ece_reward/group_bin_occupancy": 0.3993055555555555,
"signal/frontier_ece_reward/group_std_mean": 0.36748775839805603,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030854598432779313,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030854598432779313,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.44998674988746645,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2989583333333333,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4826855003833771,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04499867707490921,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04499867707490921,
"step": 5
},
{
"calibration/aurc": 0.5014903573254282,
"calibration/batch_distribution_entropy": 0.2653753261224933,
"calibration/batch_entropy_100bins": 0.3489320427418314,
"calibration/batch_entropy_10bins": 0.2653753261224933,
"calibration/batch_entropy_50bins": 0.4013330765192264,
"calibration/batch_uniqueness": 0.507430794053826,
"calibration/confidence_entropy": 0.21812283334008473,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4576217089358855,
"calibration/mean_confidence": 0.9197730529173931,
"calibration/prompt_uniqueness": 0.38747456446429124,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.018315972222222233,
"completions/max_length": 3999.2,
"completions/max_terminated_length": 3999.2,
"completions/mean_length": 479.84349365234374,
"completions/mean_terminated_length": 488.94312744140626,
"completions/min_length": 0.0,
"completions/min_terminated_length": 7.0,
"epoch": 0.023999700003749954,
"grad_norm": 0.0026754760183393955,
"learning_rate": 1.1904761904761906e-06,
"loss": -0.0003,
"num_tokens": 17661352.0,
"reward": 0.5657774209976196,
"reward_std": 0.43071451783180237,
"rewards/accuracy_reward": 0.29192708134651185,
"rewards/brier_reward": 0.35417120456695556,
"rewards/confidence_uniqueness_reward": 0.3518584191799164,
"rewards/format_reward": 0.7115451455116272,
"rewards/frontier_aurc_reward": 0.3074465751647949,
"rewards/frontier_coverage_0": 0.3074465751647949,
"rewards/frontier_coverage_1": 0.3074465751647949,
"rewards/frontier_coverage_10": 0.3074465751647949,
"rewards/frontier_coverage_15": 0.3074465751647949,
"rewards/frontier_coverage_20": 0.3074465751647949,
"rewards/frontier_coverage_25": 0.3074465751647949,
"rewards/frontier_coverage_5": 0.3074465751647949,
"rewards/frontier_ece_reward": 0.3074465751647949,
"rewards/frontier_entropy_batch_reward": -0.6805100202560425,
"signal/accuracy_reward/centered_abs_mean": 0.326611328125,
"signal/accuracy_reward/group_bin_occupancy": 0.2420138888888889,
"signal/accuracy_reward/group_std_mean": 0.38544551730155946,
"signal/accuracy_reward/group_zero_std_frac": 0.06388889066874981,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1633056640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1633056640625,
"signal/advantage_abs_mean": 0.35993013381958006,
"signal/advantage_pre_scale_abs_mean": 0.35993013381958006,
"signal/advantage_pre_scale_std": 0.43645923137664794,
"signal/advantage_std": 0.43645923137664794,
"signal/brier_reward/centered_abs_mean": 0.3224785506725311,
"signal/brier_reward/group_bin_occupancy": 0.5399305555555556,
"signal/brier_reward/group_std_mean": 0.3748953461647034,
"signal/brier_reward/group_zero_std_frac": 0.002777777798473835,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03224785625934601,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.03224785625934601,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2230827957391739,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6263888888888889,
"signal/confidence_uniqueness_reward/group_std_mean": 0.27950537800788877,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.002777777798473835,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022308281064033507,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022308281064033507,
"signal/format_reward/centered_abs_mean": 0.3555935323238373,
"signal/format_reward/group_bin_occupancy": 0.2489583333333333,
"signal/format_reward/group_std_mean": 0.42049226760864256,
"signal/format_reward/group_zero_std_frac": 0.008333333395421505,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.17779676616191864,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.17779676616191864,
"signal/frontier_aurc_reward/centered_abs_mean": 0.32133702635765077,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.41215277777777787,
"signal/frontier_aurc_reward/group_std_mean": 0.3776120483875275,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_coverage_0/centered_abs_mean": 0.32133702635765077,
"signal/frontier_coverage_0/group_bin_occupancy": 0.41215277777777787,
"signal/frontier_coverage_0/group_std_mean": 0.3776120483875275,
"signal/frontier_coverage_0/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_coverage_1/centered_abs_mean": 0.32133702635765077,
"signal/frontier_coverage_1/group_bin_occupancy": 0.41215277777777787,
"signal/frontier_coverage_1/group_std_mean": 0.3776120483875275,
"signal/frontier_coverage_1/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_coverage_10/centered_abs_mean": 0.32133702635765077,
"signal/frontier_coverage_10/group_bin_occupancy": 0.41215277777777787,
"signal/frontier_coverage_10/group_std_mean": 0.3776120483875275,
"signal/frontier_coverage_10/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_coverage_15/centered_abs_mean": 0.32133702635765077,
"signal/frontier_coverage_15/group_bin_occupancy": 0.41215277777777787,
"signal/frontier_coverage_15/group_std_mean": 0.3776120483875275,
"signal/frontier_coverage_15/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_coverage_20/centered_abs_mean": 0.32133702635765077,
"signal/frontier_coverage_20/group_bin_occupancy": 0.41215277777777787,
"signal/frontier_coverage_20/group_std_mean": 0.3776120483875275,
"signal/frontier_coverage_20/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_coverage_25/centered_abs_mean": 0.32133702635765077,
"signal/frontier_coverage_25/group_bin_occupancy": 0.41215277777777787,
"signal/frontier_coverage_25/group_std_mean": 0.3776120483875275,
"signal/frontier_coverage_25/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_coverage_5/centered_abs_mean": 0.32133702635765077,
"signal/frontier_coverage_5/group_bin_occupancy": 0.41215277777777787,
"signal/frontier_coverage_5/group_std_mean": 0.3776120483875275,
"signal/frontier_coverage_5/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004016713052988052,
"signal/frontier_ece_reward/centered_abs_mean": 0.32133702635765077,
"signal/frontier_ece_reward/group_bin_occupancy": 0.41215277777777787,
"signal/frontier_ece_reward/group_std_mean": 0.3776120483875275,
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03213370442390442,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03213370442390442,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.380006468296051,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3114583333333333,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.44059685468673704,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.038000645488500594,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.038000645488500594,
"step": 10
},
{
"calibration/aurc": 0.5613535708127804,
"calibration/batch_distribution_entropy": 0.30285369677867097,
"calibration/batch_entropy_100bins": 0.366809675757525,
"calibration/batch_entropy_10bins": 0.30285369677867097,
"calibration/batch_entropy_50bins": 0.42733167303489505,
"calibration/batch_uniqueness": 0.5391804649021238,
"calibration/confidence_entropy": 0.24734191433149855,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5070262821896179,
"calibration/mean_confidence": 0.9101403023276001,
"calibration/prompt_uniqueness": 0.44865000281603207,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011284722222222255,
"completions/max_length": 3958.4,
"completions/max_terminated_length": 3958.4,
"completions/mean_length": 423.924658203125,
"completions/mean_terminated_length": 428.80230712890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 37.8,
"epoch": 0.03599955000562493,
"grad_norm": 0.0023876321502029896,
"learning_rate": 1.7857142857142859e-06,
"loss": -0.0106,
"num_tokens": 25646948.0,
"reward": 0.6934881210327148,
"reward_std": 0.3449582040309906,
"rewards/accuracy_reward": 0.31979166269302367,
"rewards/brier_reward": 0.42017869353294374,
"rewards/confidence_uniqueness_reward": 0.49401273131370543,
"rewards/format_reward": 0.9217881917953491,
"rewards/frontier_aurc_reward": 0.34637343883514404,
"rewards/frontier_coverage_0": 0.34637343883514404,
"rewards/frontier_coverage_1": 0.34637343883514404,
"rewards/frontier_coverage_10": 0.34637343883514404,
"rewards/frontier_coverage_15": 0.34637343883514404,
"rewards/frontier_coverage_20": 0.34637343883514404,
"rewards/frontier_coverage_25": 0.34637343883514404,
"rewards/frontier_coverage_5": 0.34637343883514404,
"rewards/frontier_ece_reward": 0.34637343883514404,
"rewards/frontier_entropy_batch_reward": -0.8799565434455872,
"signal/accuracy_reward/centered_abs_mean": 0.31633029580116273,
"signal/accuracy_reward/group_bin_occupancy": 0.23958333333333331,
"signal/accuracy_reward/group_std_mean": 0.37618979811668396,
"signal/accuracy_reward/group_zero_std_frac": 0.0833333358168602,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15816514790058137,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15816514790058137,
"signal/advantage_abs_mean": 0.2800732344388962,
"signal/advantage_pre_scale_abs_mean": 0.2800732344388962,
"signal/advantage_pre_scale_std": 0.3535742461681366,
"signal/advantage_std": 0.3535742461681366,
"signal/brier_reward/centered_abs_mean": 0.29867386221885683,
"signal/brier_reward/group_bin_occupancy": 0.5961805555555555,
"signal/brier_reward/group_std_mean": 0.35164528489112856,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02986738607287407,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02986738607287407,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.17988529205322265,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6309027777777778,
"signal/confidence_uniqueness_reward/group_std_mean": 0.23147567808628083,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017988529428839682,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.017988529428839682,
"signal/format_reward/centered_abs_mean": 0.1297905795276165,
"signal/format_reward/group_bin_occupancy": 0.21909722222222222,
"signal/format_reward/group_std_mean": 0.21357380449771882,
"signal/format_reward/group_zero_std_frac": 0.24722222574055194,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06489528976380825,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.06489528976380825,
"signal/frontier_aurc_reward/centered_abs_mean": 0.30928654670715333,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.46701388888888895,
"signal/frontier_aurc_reward/group_std_mean": 0.366342568397522,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_coverage_0/centered_abs_mean": 0.30928654670715333,
"signal/frontier_coverage_0/group_bin_occupancy": 0.46701388888888895,
"signal/frontier_coverage_0/group_std_mean": 0.366342568397522,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_coverage_1/centered_abs_mean": 0.30928654670715333,
"signal/frontier_coverage_1/group_bin_occupancy": 0.46701388888888895,
"signal/frontier_coverage_1/group_std_mean": 0.366342568397522,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_coverage_10/centered_abs_mean": 0.30928654670715333,
"signal/frontier_coverage_10/group_bin_occupancy": 0.46701388888888895,
"signal/frontier_coverage_10/group_std_mean": 0.366342568397522,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_coverage_15/centered_abs_mean": 0.30928654670715333,
"signal/frontier_coverage_15/group_bin_occupancy": 0.46701388888888895,
"signal/frontier_coverage_15/group_std_mean": 0.366342568397522,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_coverage_20/centered_abs_mean": 0.30928654670715333,
"signal/frontier_coverage_20/group_bin_occupancy": 0.46701388888888895,
"signal/frontier_coverage_20/group_std_mean": 0.366342568397522,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_coverage_25/centered_abs_mean": 0.30928654670715333,
"signal/frontier_coverage_25/group_bin_occupancy": 0.46701388888888895,
"signal/frontier_coverage_25/group_std_mean": 0.366342568397522,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_coverage_5/centered_abs_mean": 0.30928654670715333,
"signal/frontier_coverage_5/group_bin_occupancy": 0.46701388888888895,
"signal/frontier_coverage_5/group_std_mean": 0.366342568397522,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038660819176584483,
"signal/frontier_ece_reward/centered_abs_mean": 0.30928654670715333,
"signal/frontier_ece_reward/group_bin_occupancy": 0.46701388888888895,
"signal/frontier_ece_reward/group_std_mean": 0.366342568397522,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030928655341267586,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030928655341267586,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1947160005569458,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.31145833333333334,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3006825089454651,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.07222222201526166,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019471600651741028,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019471600651741028,
"step": 15
},
{
"calibration/aurc": 0.44435261205240256,
"calibration/batch_distribution_entropy": 0.3935012421540486,
"calibration/batch_entropy_100bins": 0.3987097622781842,
"calibration/batch_entropy_10bins": 0.3935012421540486,
"calibration/batch_entropy_50bins": 0.46414418845684446,
"calibration/batch_uniqueness": 0.5991141979420094,
"calibration/buffer_distribution_entropy": 0.3111829772402379,
"calibration/buffer_entropy_100bins": 0.37510196723994477,
"calibration/buffer_entropy_10bins": 0.3111829772402379,
"calibration/buffer_entropy_50bins": 0.4347081916262859,
"calibration/confidence_entropy": 0.3047036317626108,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.10091759638224643,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.38096295336946745,
"calibration/mean_confidence": 0.8873765282761564,
"calibration/prompt_uniqueness": 0.5068829583550546,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01050347222222221,
"completions/max_length": 3976.8,
"completions/max_terminated_length": 3976.8,
"completions/mean_length": 469.0471435546875,
"completions/mean_terminated_length": 474.0674682617188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 92.8,
"epoch": 0.04799940000749991,
"grad_norm": 0.001804217929020524,
"learning_rate": 2.380952380952381e-06,
"loss": -0.0096,
"num_tokens": 34164067.0,
"reward": 0.7753249049186707,
"reward_std": 0.2653871446847916,
"rewards/accuracy_reward": 0.44453125,
"rewards/brier_reward": 0.5578694939613342,
"rewards/confidence_uniqueness_reward": 0.5910940289497375,
"rewards/format_reward": 0.9848958373069763,
"rewards/frontier_aurc_reward": 0.1972955190576613,
"rewards/frontier_coverage_0": 0.20763492183759807,
"rewards/frontier_coverage_1": 0.20763492183759807,
"rewards/frontier_coverage_10": 0.20763492183759807,
"rewards/frontier_coverage_15": 0.20763492183759807,
"rewards/frontier_coverage_20": 0.20763492183759807,
"rewards/frontier_coverage_25": 0.20763492183759807,
"rewards/frontier_coverage_5": 0.20763492183759807,
"rewards/frontier_ece_reward": 0.19086738899350167,
"rewards/frontier_entropy_batch_reward": -0.9400596499443055,
"signal/accuracy_reward/centered_abs_mean": 0.29937608242034913,
"signal/accuracy_reward/group_bin_occupancy": 0.2409722222222222,
"signal/accuracy_reward/group_std_mean": 0.36640662550926206,
"signal/accuracy_reward/group_zero_std_frac": 0.07222222425043583,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14968804121017457,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14968804121017457,
"signal/advantage_abs_mean": 0.21280945539474488,
"signal/advantage_pre_scale_abs_mean": 0.21280945539474488,
"signal/advantage_pre_scale_std": 0.274143123626709,
"signal/advantage_std": 0.274143123626709,
"signal/brier_reward/centered_abs_mean": 0.2618247151374817,
"signal/brier_reward/group_bin_occupancy": 0.6493055555555556,
"signal/brier_reward/group_std_mean": 0.3188887655735016,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026182472333312036,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.026182472333312036,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1749788463115692,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6107638888888889,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2077195405960083,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01749788485467434,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01749788485467434,
"signal/format_reward/centered_abs_mean": 0.02798394113779068,
"signal/format_reward/group_bin_occupancy": 0.16215277777777776,
"signal/format_reward/group_std_mean": 0.06326771751046181,
"signal/format_reward/group_zero_std_frac": 0.7027777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01399197056889534,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01399197056889534,
"signal/frontier_aurc_reward/centered_abs_mean": 0.12959627383388578,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6298611111111111,
"signal/frontier_aurc_reward/group_std_mean": 0.15695078764110804,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0016199534831685015,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0016199534831685015,
"signal/frontier_coverage_0/centered_abs_mean": 0.1446688774973154,
"signal/frontier_coverage_0/group_bin_occupancy": 0.61875,
"signal/frontier_coverage_0/group_std_mean": 0.18371742591261864,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001808361045550555,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001808361045550555,
"signal/frontier_coverage_1/centered_abs_mean": 0.1446688774973154,
"signal/frontier_coverage_1/group_bin_occupancy": 0.61875,
"signal/frontier_coverage_1/group_std_mean": 0.18371742591261864,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001808361045550555,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001808361045550555,
"signal/frontier_coverage_10/centered_abs_mean": 0.1446688774973154,
"signal/frontier_coverage_10/group_bin_occupancy": 0.61875,
"signal/frontier_coverage_10/group_std_mean": 0.18371742591261864,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001808361045550555,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001808361045550555,
"signal/frontier_coverage_15/centered_abs_mean": 0.1446688774973154,
"signal/frontier_coverage_15/group_bin_occupancy": 0.61875,
"signal/frontier_coverage_15/group_std_mean": 0.18371742591261864,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001808361045550555,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001808361045550555,
"signal/frontier_coverage_20/centered_abs_mean": 0.1446688774973154,
"signal/frontier_coverage_20/group_bin_occupancy": 0.61875,
"signal/frontier_coverage_20/group_std_mean": 0.18371742591261864,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001808361045550555,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001808361045550555,
"signal/frontier_coverage_25/centered_abs_mean": 0.1446688774973154,
"signal/frontier_coverage_25/group_bin_occupancy": 0.61875,
"signal/frontier_coverage_25/group_std_mean": 0.18371742591261864,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001808361045550555,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001808361045550555,
"signal/frontier_coverage_5/centered_abs_mean": 0.1446688774973154,
"signal/frontier_coverage_5/group_bin_occupancy": 0.61875,
"signal/frontier_coverage_5/group_std_mean": 0.18371742591261864,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001808361045550555,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001808361045550555,
"signal/frontier_ece_reward/centered_abs_mean": 0.21210699677467346,
"signal/frontier_ece_reward/group_bin_occupancy": 0.5527777777777778,
"signal/frontier_ece_reward/group_std_mean": 0.2537235528230667,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.021210700459778308,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.021210700459778308,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10517127364873886,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2833333333333333,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2029697299003601,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.24166667461395264,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010517127625644208,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010517127625644208,
"step": 20
},
{
"calibration/aurc": 0.3473902280376032,
"calibration/batch_distribution_entropy": 0.5674422584825392,
"calibration/batch_entropy_100bins": 0.45404012302736235,
"calibration/batch_entropy_10bins": 0.5674422584825392,
"calibration/batch_entropy_50bins": 0.5285762138333968,
"calibration/batch_uniqueness": 0.6932638785134733,
"calibration/buffer_distribution_entropy": 0.35995197977266097,
"calibration/buffer_entropy_100bins": 0.39751875483045446,
"calibration/buffer_entropy_10bins": 0.35995197977266097,
"calibration/buffer_entropy_50bins": 0.46073419347011135,
"calibration/confidence_entropy": 0.36861193170010964,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.06354166666666666,
"calibration/coverage@20%": 0.17879150175901498,
"calibration/coverage@25%": 0.3004287598944591,
"calibration/coverage@30%": 0.4125326370757181,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.2454035888451538,
"calibration/mean_confidence": 0.8470411999313076,
"calibration/prompt_uniqueness": 0.5898410725874782,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01206597222222221,
"completions/max_length": 3880.8,
"completions/max_terminated_length": 3880.8,
"completions/mean_length": 531.1012268066406,
"completions/mean_terminated_length": 537.5916625976563,
"completions/min_length": 0.0,
"completions/min_terminated_length": 116.0,
"epoch": 0.05999925000937488,
"grad_norm": 0.0007845126674510539,
"learning_rate": 2.9761904761904763e-06,
"loss": -0.0073,
"num_tokens": 43406801.0,
"reward": 0.8140245795249939,
"reward_std": 0.20924938917160035,
"rewards/accuracy_reward": 0.5595486044883728,
"rewards/brier_reward": 0.6664814233779908,
"rewards/confidence_uniqueness_reward": 0.6804561376571655,
"rewards/format_reward": 0.9850694537162781,
"rewards/frontier_aurc_reward": -0.004134485684335232,
"rewards/frontier_coverage_0": 0.001367491763085127,
"rewards/frontier_coverage_1": 0.001367491763085127,
"rewards/frontier_coverage_10": 0.001367491763085127,
"rewards/frontier_coverage_15": 0.001367491763085127,
"rewards/frontier_coverage_20": 0.001367491763085127,
"rewards/frontier_coverage_25": 0.001367491763085127,
"rewards/frontier_coverage_5": 0.001367491763085127,
"rewards/frontier_ece_reward": 0.012747335172025486,
"rewards/frontier_entropy_batch_reward": -0.9432091236114502,
"signal/accuracy_reward/centered_abs_mean": 0.26035155951976774,
"signal/accuracy_reward/group_bin_occupancy": 0.23125,
"signal/accuracy_reward/group_std_mean": 0.3247913300991058,
"signal/accuracy_reward/group_zero_std_frac": 0.1500000014901161,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13017577975988387,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.13017577975988387,
"signal/advantage_abs_mean": 0.16362378895282745,
"signal/advantage_pre_scale_abs_mean": 0.16362378895282745,
"signal/advantage_pre_scale_std": 0.22541998326778412,
"signal/advantage_std": 0.22541998326778412,
"signal/brier_reward/centered_abs_mean": 0.20725049078464508,
"signal/brier_reward/group_bin_occupancy": 0.7125,
"signal/brier_reward/group_std_mean": 0.25996835231781007,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020725049078464508,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.020725049078464508,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.13917125761508942,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6472222222222224,
"signal/confidence_uniqueness_reward/group_std_mean": 0.16648139357566832,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013917125947773456,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013917125947773456,
"signal/format_reward/centered_abs_mean": 0.02619357593357563,
"signal/format_reward/group_bin_occupancy": 0.15347222222222223,
"signal/format_reward/group_std_mean": 0.0523154728114605,
"signal/format_reward/group_zero_std_frac": 0.7722222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013096787966787815,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013096787966787815,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003040881175547838,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.684375,
"signal/frontier_aurc_reward/group_std_mean": 0.004443522915244103,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.801101702265442e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.801101702265442e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.03864669501781463,
"signal/frontier_coverage_0/group_bin_occupancy": 0.7611111111111112,
"signal/frontier_coverage_0/group_std_mean": 0.05980467274785042,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0004830837191548198,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0004830837191548198,
"signal/frontier_coverage_1/centered_abs_mean": 0.03864669501781463,
"signal/frontier_coverage_1/group_bin_occupancy": 0.7611111111111112,
"signal/frontier_coverage_1/group_std_mean": 0.05980467274785042,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0004830837191548198,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0004830837191548198,
"signal/frontier_coverage_10/centered_abs_mean": 0.03864669501781463,
"signal/frontier_coverage_10/group_bin_occupancy": 0.7611111111111112,
"signal/frontier_coverage_10/group_std_mean": 0.05980467274785042,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0004830837191548198,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0004830837191548198,
"signal/frontier_coverage_15/centered_abs_mean": 0.03864669501781463,
"signal/frontier_coverage_15/group_bin_occupancy": 0.7611111111111112,
"signal/frontier_coverage_15/group_std_mean": 0.05980467274785042,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0004830837191548198,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0004830837191548198,
"signal/frontier_coverage_20/centered_abs_mean": 0.03864669501781463,
"signal/frontier_coverage_20/group_bin_occupancy": 0.7611111111111112,
"signal/frontier_coverage_20/group_std_mean": 0.05980467274785042,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0004830837191548198,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0004830837191548198,
"signal/frontier_coverage_25/centered_abs_mean": 0.03864669501781463,
"signal/frontier_coverage_25/group_bin_occupancy": 0.7611111111111112,
"signal/frontier_coverage_25/group_std_mean": 0.05980467274785042,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0004830837191548198,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0004830837191548198,
"signal/frontier_coverage_5/centered_abs_mean": 0.03864669501781463,
"signal/frontier_coverage_5/group_bin_occupancy": 0.7611111111111112,
"signal/frontier_coverage_5/group_std_mean": 0.05980467274785042,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0004830837191548198,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0004830837191548198,
"signal/frontier_ece_reward/centered_abs_mean": 0.11394972950220109,
"signal/frontier_ece_reward/group_bin_occupancy": 0.685763888888889,
"signal/frontier_ece_reward/group_std_mean": 0.13674613535404206,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011394973285496235,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011394973285496235,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09805433601140975,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25659722222222225,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.1901380091905594,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.31666666865348814,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.009805433824658394,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.009805433824658394,
"step": 25
},
{
"calibration/aurc": 0.3127096016826255,
"calibration/batch_distribution_entropy": 0.682729244006534,
"calibration/batch_entropy_100bins": 0.4653335228882831,
"calibration/batch_entropy_10bins": 0.682729244006534,
"calibration/batch_entropy_50bins": 0.5435833807311128,
"calibration/batch_uniqueness": 0.7277807574566821,
"calibration/buffer_distribution_entropy": 0.44270389836197455,
"calibration/buffer_entropy_100bins": 0.43136583308440946,
"calibration/buffer_entropy_10bins": 0.44270389836197455,
"calibration/buffer_entropy_50bins": 0.5002752341976129,
"calibration/confidence_entropy": 0.47803069493853273,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.06774193548387096,
"calibration/coverage@25%": 0.16167049483339363,
"calibration/coverage@30%": 0.5524753519206546,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.1434195157005612,
"calibration/mean_confidence": 0.7807785223650668,
"calibration/prompt_uniqueness": 0.6348340446173696,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015451388888888907,
"completions/max_length": 3997.2,
"completions/max_terminated_length": 3997.2,
"completions/mean_length": 605.9027099609375,
"completions/mean_terminated_length": 615.4102783203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 145.8,
"epoch": 0.07199910001124986,
"grad_norm": 0.000519786321092397,
"learning_rate": 3.5714285714285718e-06,
"loss": -0.008,
"num_tokens": 53496720.0,
"reward": 0.8398738265037536,
"reward_std": 0.1871805250644684,
"rewards/accuracy_reward": 0.598524296283722,
"rewards/brier_reward": 0.7149089694023132,
"rewards/confidence_uniqueness_reward": 0.7139938831329345,
"rewards/format_reward": 0.9826388955116272,
"rewards/frontier_aurc_reward": -0.003387345978990197,
"rewards/frontier_coverage_0": -0.007904923893511296,
"rewards/frontier_coverage_1": -0.007904923893511296,
"rewards/frontier_coverage_10": -0.007904923893511296,
"rewards/frontier_coverage_15": -0.007904923893511296,
"rewards/frontier_coverage_20": -0.007904923893511296,
"rewards/frontier_coverage_25": -0.007904923893511296,
"rewards/frontier_coverage_5": -0.007904923893511296,
"rewards/frontier_ece_reward": 0.0140090461820364,
"rewards/frontier_entropy_batch_reward": -0.9426495194435119,
"signal/accuracy_reward/centered_abs_mean": 0.23240560591220855,
"signal/accuracy_reward/group_bin_occupancy": 0.22430555555555554,
"signal/accuracy_reward/group_std_mean": 0.29407615661621095,
"signal/accuracy_reward/group_zero_std_frac": 0.20555555820465088,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11620280295610427,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11620280295610427,
"signal/advantage_abs_mean": 0.1434020832180977,
"signal/advantage_pre_scale_abs_mean": 0.1434020832180977,
"signal/advantage_pre_scale_std": 0.2102207988500595,
"signal/advantage_std": 0.2102207988500595,
"signal/brier_reward/centered_abs_mean": 0.16666824817657472,
"signal/brier_reward/group_bin_occupancy": 0.7510416666666667,
"signal/brier_reward/group_std_mean": 0.2106872409582138,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016666825301945208,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016666825301945208,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.10946927219629288,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6517361111111111,
"signal/confidence_uniqueness_reward/group_std_mean": 0.13697669506072999,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010946927219629287,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010946927219629287,
"signal/format_reward/centered_abs_mean": 0.02850477434694767,
"signal/format_reward/group_bin_occupancy": 0.15138888888888888,
"signal/format_reward/group_std_mean": 0.05226071253418922,
"signal/format_reward/group_zero_std_frac": 0.7888888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014252387173473834,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.014252387173473834,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018850252265110612,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7041666666666667,
"signal/frontier_aurc_reward/group_std_mean": 0.002827100735157728,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.356281656830106e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.356281656830106e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.05347522720694542,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8100694444444445,
"signal/frontier_coverage_0/group_std_mean": 0.07446252554655075,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00066844035172835,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00066844035172835,
"signal/frontier_coverage_1/centered_abs_mean": 0.05347522720694542,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8100694444444445,
"signal/frontier_coverage_1/group_std_mean": 0.07446252554655075,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00066844035172835,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00066844035172835,
"signal/frontier_coverage_10/centered_abs_mean": 0.05347522720694542,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8100694444444445,
"signal/frontier_coverage_10/group_std_mean": 0.07446252554655075,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00066844035172835,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00066844035172835,
"signal/frontier_coverage_15/centered_abs_mean": 0.05347522720694542,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8100694444444445,
"signal/frontier_coverage_15/group_std_mean": 0.07446252554655075,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00066844035172835,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00066844035172835,
"signal/frontier_coverage_20/centered_abs_mean": 0.05347522720694542,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8100694444444445,
"signal/frontier_coverage_20/group_std_mean": 0.07446252554655075,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00066844035172835,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00066844035172835,
"signal/frontier_coverage_25/centered_abs_mean": 0.05347522720694542,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8100694444444445,
"signal/frontier_coverage_25/group_std_mean": 0.07446252554655075,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00066844035172835,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00066844035172835,
"signal/frontier_coverage_5/centered_abs_mean": 0.05347522720694542,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8100694444444445,
"signal/frontier_coverage_5/group_std_mean": 0.07446252554655075,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00066844035172835,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00066844035172835,
"signal/frontier_ece_reward/centered_abs_mean": 0.06974484175443649,
"signal/frontier_ece_reward/group_bin_occupancy": 0.726388888888889,
"signal/frontier_ece_reward/group_std_mean": 0.08952345997095108,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006974484585225582,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006974484585225582,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09957558512687684,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25381944444444443,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.19237028956413268,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.31944444179534914,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.00995755884796381,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.00995755884796381,
"step": 30
},
{
"calibration/aurc": 0.2571704237558802,
"calibration/batch_distribution_entropy": 0.7051875161366095,
"calibration/batch_entropy_100bins": 0.48735346857362777,
"calibration/batch_entropy_10bins": 0.7051875161366095,
"calibration/batch_entropy_50bins": 0.5711749632836897,
"calibration/batch_uniqueness": 0.7382661567514962,
"calibration/buffer_distribution_entropy": 0.5312403517873816,
"calibration/buffer_entropy_100bins": 0.46550454069451846,
"calibration/buffer_entropy_10bins": 0.5312403517873816,
"calibration/buffer_entropy_50bins": 0.5407207375942191,
"calibration/confidence_entropy": 0.5339174803962459,
"calibration/coverage@0%": 0.009015236807393835,
"calibration/coverage@1%": 0.009015236807393835,
"calibration/coverage@10%": 0.06192117185778577,
"calibration/coverage@15%": 0.12324599591726784,
"calibration/coverage@20%": 0.20330160686285859,
"calibration/coverage@25%": 0.5564677729596665,
"calibration/coverage@30%": 0.8,
"calibration/coverage@5%": 0.027962605228446468,
"calibration/ece": 0.11199284756638057,
"calibration/mean_confidence": 0.7308071025680081,
"calibration/prompt_uniqueness": 0.6523236468285705,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.021614583333333326,
"completions/max_length": 3965.0,
"completions/max_terminated_length": 3965.0,
"completions/mean_length": 644.9698852539062,
"completions/mean_terminated_length": 659.2716430664062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 206.2,
"epoch": 0.08399895001312484,
"grad_norm": 0.0004933425807394087,
"learning_rate": 4.166666666666667e-06,
"loss": -0.0138,
"num_tokens": 64004213.0,
"reward": 0.8588806509971618,
"reward_std": 0.17558546364307404,
"rewards/accuracy_reward": 0.6338541626930236,
"rewards/brier_reward": 0.7461713314056396,
"rewards/confidence_uniqueness_reward": 0.7108201503753662,
"rewards/format_reward": 0.9765625,
"rewards/frontier_aurc_reward": -0.0027221166528761387,
"rewards/frontier_coverage_0": -0.01736396786291152,
"rewards/frontier_coverage_1": -0.01736396786291152,
"rewards/frontier_coverage_10": -0.01736396786291152,
"rewards/frontier_coverage_15": -0.01736396786291152,
"rewards/frontier_coverage_20": -0.01736396786291152,
"rewards/frontier_coverage_25": -0.01736396786291152,
"rewards/frontier_coverage_5": -0.01736396786291152,
"rewards/frontier_ece_reward": 0.013199667818844319,
"rewards/frontier_entropy_batch_reward": -0.9179341077804566,
"signal/accuracy_reward/centered_abs_mean": 0.20397135317325593,
"signal/accuracy_reward/group_bin_occupancy": 0.21909722222222222,
"signal/accuracy_reward/group_std_mean": 0.26651409566402434,
"signal/accuracy_reward/group_zero_std_frac": 0.24722222089767457,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10198567658662797,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10198567658662797,
"signal/advantage_abs_mean": 0.1289672315120697,
"signal/advantage_pre_scale_abs_mean": 0.1289672315120697,
"signal/advantage_pre_scale_std": 0.19859465956687927,
"signal/advantage_std": 0.19859465956687927,
"signal/brier_reward/centered_abs_mean": 0.1401739925146103,
"signal/brier_reward/group_bin_occupancy": 0.7861111111111111,
"signal/brier_reward/group_std_mean": 0.18215077519416809,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014017399214208127,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014017399214208127,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11878160536289215,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6989583333333333,
"signal/confidence_uniqueness_reward/group_std_mean": 0.14878978729248046,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011878160759806633,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011878160759806633,
"signal/format_reward/centered_abs_mean": 0.03610026016831398,
"signal/format_reward/group_bin_occupancy": 0.15694444444444444,
"signal/format_reward/group_std_mean": 0.06481491774320602,
"signal/format_reward/group_zero_std_frac": 0.7444444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01805013008415699,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01805013008415699,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001359763811342418,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7402777777777778,
"signal/frontier_aurc_reward/group_std_mean": 0.0021033880300819876,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6997047714539805e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6997047714539805e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.06835834383964538,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8423611111111111,
"signal/frontier_coverage_0/group_std_mean": 0.09002460837364197,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0008544792886823416,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0008544792886823416,
"signal/frontier_coverage_1/centered_abs_mean": 0.06835834383964538,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8423611111111111,
"signal/frontier_coverage_1/group_std_mean": 0.09002460837364197,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0008544792886823416,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0008544792886823416,
"signal/frontier_coverage_10/centered_abs_mean": 0.06835834383964538,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8423611111111111,
"signal/frontier_coverage_10/group_std_mean": 0.09002460837364197,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0008544792886823416,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008544792886823416,
"signal/frontier_coverage_15/centered_abs_mean": 0.06835834383964538,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8423611111111111,
"signal/frontier_coverage_15/group_std_mean": 0.09002460837364197,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008544792886823416,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008544792886823416,
"signal/frontier_coverage_20/centered_abs_mean": 0.06835834383964538,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8423611111111111,
"signal/frontier_coverage_20/group_std_mean": 0.09002460837364197,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008544792886823416,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008544792886823416,
"signal/frontier_coverage_25/centered_abs_mean": 0.06835834383964538,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8423611111111111,
"signal/frontier_coverage_25/group_std_mean": 0.09002460837364197,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008544792886823416,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008544792886823416,
"signal/frontier_coverage_5/centered_abs_mean": 0.06835834383964538,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8423611111111111,
"signal/frontier_coverage_5/group_std_mean": 0.09002460837364197,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0008544792886823416,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0008544792886823416,
"signal/frontier_ece_reward/centered_abs_mean": 0.0456878550350666,
"signal/frontier_ece_reward/group_bin_occupancy": 0.726736111111111,
"signal/frontier_ece_reward/group_std_mean": 0.06319972574710846,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004568785382434726,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004568785382434726,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.13658613115549087,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3038194444444444,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24717094898223876,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.17777777910232545,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.013658612966537476,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013658612966537476,
"step": 35
},
{
"calibration/aurc": 0.3090385450407646,
"calibration/batch_distribution_entropy": 0.7382054017370615,
"calibration/batch_entropy_100bins": 0.6274540890939544,
"calibration/batch_entropy_10bins": 0.7382054017370615,
"calibration/batch_entropy_50bins": 0.6888115588991056,
"calibration/batch_uniqueness": 0.8256191615498129,
"calibration/buffer_distribution_entropy": 0.5895912459678766,
"calibration/buffer_entropy_100bins": 0.4977340263064625,
"calibration/buffer_entropy_10bins": 0.5895912459678766,
"calibration/buffer_entropy_50bins": 0.5756374115825056,
"calibration/confidence_entropy": 0.5221476459958998,
"calibration/coverage@0%": 0.002116402116402116,
"calibration/coverage@1%": 0.002116402116402116,
"calibration/coverage@10%": 0.06582397391796871,
"calibration/coverage@15%": 0.07211101440866455,
"calibration/coverage@20%": 0.11545305096218934,
"calibration/coverage@25%": 0.2742136018898421,
"calibration/coverage@30%": 0.3149446723859256,
"calibration/coverage@5%": 0.014649039192120131,
"calibration/ece": 0.11780196610367691,
"calibration/mean_confidence": 0.7270655645788431,
"calibration/prompt_uniqueness": 0.761317842320403,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017881944444444443,
"completions/max_length": 3955.8,
"completions/max_terminated_length": 3955.8,
"completions/mean_length": 685.938818359375,
"completions/mean_terminated_length": 698.407666015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 226.6,
"epoch": 0.09599880001499982,
"grad_norm": 0.00048642107867635787,
"learning_rate": 4.761904761904762e-06,
"loss": -0.0134,
"num_tokens": 75025748.0,
"reward": 0.8857535004615784,
"reward_std": 0.178457173705101,
"rewards/accuracy_reward": 0.6368923425674439,
"rewards/brier_reward": 0.7461103081703186,
"rewards/confidence_uniqueness_reward": 0.8087468624114991,
"rewards/format_reward": 0.9810763955116272,
"rewards/frontier_aurc_reward": -0.0026570867747068407,
"rewards/frontier_coverage_0": -0.019249437330290674,
"rewards/frontier_coverage_1": -0.019249437330290674,
"rewards/frontier_coverage_10": -0.019249437330290674,
"rewards/frontier_coverage_15": -0.019249437330290674,
"rewards/frontier_coverage_20": -0.019249437330290674,
"rewards/frontier_coverage_25": -0.019249437330290674,
"rewards/frontier_coverage_5": -0.019249437330290674,
"rewards/frontier_ece_reward": 0.008992346841841936,
"rewards/frontier_entropy_batch_reward": -0.7789829492568969,
"signal/accuracy_reward/centered_abs_mean": 0.20183919072151185,
"signal/accuracy_reward/group_bin_occupancy": 0.21909722222222222,
"signal/accuracy_reward/group_std_mean": 0.26670118868350984,
"signal/accuracy_reward/group_zero_std_frac": 0.2472222238779068,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10091959536075593,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10091959536075593,
"signal/advantage_abs_mean": 0.13196419775485993,
"signal/advantage_pre_scale_abs_mean": 0.13196419775485993,
"signal/advantage_pre_scale_std": 0.20010344088077545,
"signal/advantage_std": 0.20010344088077545,
"signal/brier_reward/centered_abs_mean": 0.14929873943328859,
"signal/brier_reward/group_bin_occupancy": 0.8149305555555555,
"signal/brier_reward/group_std_mean": 0.19311635494232177,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014929874055087566,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014929874055087566,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.10491774380207061,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6815972222222222,
"signal/confidence_uniqueness_reward/group_std_mean": 0.13206958025693893,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010491774696856738,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010491774696856738,
"signal/format_reward/centered_abs_mean": 0.03161892369389534,
"signal/format_reward/group_bin_occupancy": 0.15243055555555557,
"signal/format_reward/group_std_mean": 0.0556372843682766,
"signal/format_reward/group_zero_std_frac": 0.7805555582046508,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01580946184694767,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01580946184694767,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018603557720780373,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7145833333333333,
"signal/frontier_aurc_reward/group_std_mean": 0.002942401263862848,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.325444802409038e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.325444802409038e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.08571741878986358,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8364583333333334,
"signal/frontier_coverage_0/group_std_mean": 0.11739055812358856,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0010714677278883754,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0010714677278883754,
"signal/frontier_coverage_1/centered_abs_mean": 0.08571741878986358,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8364583333333334,
"signal/frontier_coverage_1/group_std_mean": 0.11739055812358856,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0010714677278883754,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0010714677278883754,
"signal/frontier_coverage_10/centered_abs_mean": 0.08571741878986358,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8364583333333334,
"signal/frontier_coverage_10/group_std_mean": 0.11739055812358856,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0010714677278883754,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010714677278883754,
"signal/frontier_coverage_15/centered_abs_mean": 0.08571741878986358,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8364583333333334,
"signal/frontier_coverage_15/group_std_mean": 0.11739055812358856,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010714677278883754,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010714677278883754,
"signal/frontier_coverage_20/centered_abs_mean": 0.08571741878986358,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8364583333333334,
"signal/frontier_coverage_20/group_std_mean": 0.11739055812358856,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010714677278883754,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010714677278883754,
"signal/frontier_coverage_25/centered_abs_mean": 0.08571741878986358,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8364583333333334,
"signal/frontier_coverage_25/group_std_mean": 0.11739055812358856,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010714677278883754,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010714677278883754,
"signal/frontier_coverage_5/centered_abs_mean": 0.08571741878986358,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8364583333333334,
"signal/frontier_coverage_5/group_std_mean": 0.11739055812358856,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0010714677278883754,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0010714677278883754,
"signal/frontier_ece_reward/centered_abs_mean": 0.0511918880045414,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7284722222222222,
"signal/frontier_ece_reward/group_std_mean": 0.08025645166635513,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005119189154356718,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005119189154356718,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3113617360591888,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.5229166666666666,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4173878490924835,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.01388888917863369,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031136173382401468,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031136173382401468,
"step": 40
},
{
"calibration/aurc": 0.22827400940686599,
"calibration/batch_distribution_entropy": 0.9071498888134337,
"calibration/batch_entropy_100bins": 0.8968223970662214,
"calibration/batch_entropy_10bins": 0.9071498888134337,
"calibration/batch_entropy_50bins": 0.9171079157181918,
"calibration/batch_uniqueness": 0.9456390260906643,
"calibration/buffer_distribution_entropy": 0.6393650601547213,
"calibration/buffer_entropy_100bins": 0.5675126064584276,
"calibration/buffer_entropy_10bins": 0.6393650601547213,
"calibration/buffer_entropy_50bins": 0.6379876044842737,
"calibration/confidence_entropy": 0.5032381198722609,
"calibration/coverage@0%": 0.014736842105263156,
"calibration/coverage@1%": 0.014736842105263156,
"calibration/coverage@10%": 0.055916127216436816,
"calibration/coverage@15%": 0.10939021240009408,
"calibration/coverage@20%": 0.30897909044000377,
"calibration/coverage@25%": 0.6747945574902163,
"calibration/coverage@30%": 0.9791666666666666,
"calibration/coverage@5%": 0.034210526315789476,
"calibration/ece": 0.16323481632764078,
"calibration/mean_confidence": 0.6319245579648017,
"calibration/prompt_uniqueness": 0.879434752960265,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02144097222222221,
"completions/max_length": 3757.8,
"completions/max_terminated_length": 3757.8,
"completions/mean_length": 724.39765625,
"completions/mean_terminated_length": 740.4448852539062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 246.0,
"epoch": 0.1079986500168748,
"grad_norm": 0.0004063255328219384,
"learning_rate": 4.909638554216868e-06,
"loss": -0.0167,
"num_tokens": 86506073.0,
"reward": 0.9310973882675171,
"reward_std": 0.17580304443836212,
"rewards/accuracy_reward": 0.6424479246139526,
"rewards/brier_reward": 0.7319015741348267,
"rewards/confidence_uniqueness_reward": 0.9265702605247498,
"rewards/format_reward": 0.9777777791023254,
"rewards/frontier_aurc_reward": -0.002285001240670681,
"rewards/frontier_coverage_0": -0.033109604939818384,
"rewards/frontier_coverage_1": -0.033109604939818384,
"rewards/frontier_coverage_10": -0.033109604939818384,
"rewards/frontier_coverage_15": -0.033109604939818384,
"rewards/frontier_coverage_20": -0.033109604939818384,
"rewards/frontier_coverage_25": -0.033109604939818384,
"rewards/frontier_coverage_5": -0.033109604939818384,
"rewards/frontier_ece_reward": -0.006580299325287342,
"rewards/frontier_entropy_batch_reward": -0.41278970837593076,
"signal/accuracy_reward/centered_abs_mean": 0.19691297709941863,
"signal/accuracy_reward/group_bin_occupancy": 0.21909722222222222,
"signal/accuracy_reward/group_std_mean": 0.2627987444400787,
"signal/accuracy_reward/group_zero_std_frac": 0.2472222238779068,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09845648854970931,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09845648854970931,
"signal/advantage_abs_mean": 0.13047890067100526,
"signal/advantage_pre_scale_abs_mean": 0.13047890067100526,
"signal/advantage_pre_scale_std": 0.19528700709342955,
"signal/advantage_std": 0.19528700709342955,
"signal/brier_reward/centered_abs_mean": 0.19260537028312683,
"signal/brier_reward/group_bin_occupancy": 0.8631944444444445,
"signal/brier_reward/group_std_mean": 0.24576567709445954,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01926053762435913,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01926053762435913,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04959237203001976,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08034891486167908,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0049592372961342335,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0049592372961342335,
"signal/format_reward/centered_abs_mean": 0.03552517332136631,
"signal/format_reward/group_bin_occupancy": 0.15694444444444444,
"signal/format_reward/group_std_mean": 0.06437588557600975,
"signal/format_reward/group_zero_std_frac": 0.7444444537162781,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.017762586660683154,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.017762586660683154,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001982824504375458,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.670138888888889,
"signal/frontier_aurc_reward/group_std_mean": 0.0033466981258243322,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.478530877851881e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.478530877851881e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18281554132699968,
"signal/frontier_coverage_0/group_bin_occupancy": 0.867013888888889,
"signal/frontier_coverage_0/group_std_mean": 0.24726351499557495,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002285194396972656,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002285194396972656,
"signal/frontier_coverage_1/centered_abs_mean": 0.18281554132699968,
"signal/frontier_coverage_1/group_bin_occupancy": 0.867013888888889,
"signal/frontier_coverage_1/group_std_mean": 0.24726351499557495,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002285194396972656,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002285194396972656,
"signal/frontier_coverage_10/centered_abs_mean": 0.18281554132699968,
"signal/frontier_coverage_10/group_bin_occupancy": 0.867013888888889,
"signal/frontier_coverage_10/group_std_mean": 0.24726351499557495,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002285194396972656,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002285194396972656,
"signal/frontier_coverage_15/centered_abs_mean": 0.18281554132699968,
"signal/frontier_coverage_15/group_bin_occupancy": 0.867013888888889,
"signal/frontier_coverage_15/group_std_mean": 0.24726351499557495,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002285194396972656,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002285194396972656,
"signal/frontier_coverage_20/centered_abs_mean": 0.18281554132699968,
"signal/frontier_coverage_20/group_bin_occupancy": 0.867013888888889,
"signal/frontier_coverage_20/group_std_mean": 0.24726351499557495,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002285194396972656,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002285194396972656,
"signal/frontier_coverage_25/centered_abs_mean": 0.18281554132699968,
"signal/frontier_coverage_25/group_bin_occupancy": 0.867013888888889,
"signal/frontier_coverage_25/group_std_mean": 0.24726351499557495,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002285194396972656,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002285194396972656,
"signal/frontier_coverage_5/centered_abs_mean": 0.18281554132699968,
"signal/frontier_coverage_5/group_bin_occupancy": 0.867013888888889,
"signal/frontier_coverage_5/group_std_mean": 0.24726351499557495,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002285194396972656,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002285194396972656,
"signal/frontier_ece_reward/centered_abs_mean": 0.07174393385648728,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7809027777777777,
"signal/frontier_ece_reward/group_std_mean": 0.11735818088054657,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007174393441528082,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007174393441528082,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.40669200420379636,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.775,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.46814131140708926,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04066920205950737,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04066920205950737,
"step": 45
},
{
"calibration/aurc": 0.39852565881023116,
"calibration/batch_distribution_entropy": 0.9787449958619818,
"calibration/batch_entropy_100bins": 0.9592369510418349,
"calibration/batch_entropy_10bins": 0.9787449958619818,
"calibration/batch_entropy_50bins": 0.971046332337011,
"calibration/batch_uniqueness": 0.9525960238680078,
"calibration/buffer_distribution_entropy": 0.7212969635124125,
"calibration/buffer_entropy_100bins": 0.6553290358103675,
"calibration/buffer_entropy_10bins": 0.7212969635124125,
"calibration/buffer_entropy_50bins": 0.7162547318725517,
"calibration/confidence_entropy": 0.49994193937653375,
"calibration/coverage@0%": 0.0005235602094240838,
"calibration/coverage@1%": 0.0005235602094240838,
"calibration/coverage@10%": 0.011218747375199485,
"calibration/coverage@15%": 0.011753506733488257,
"calibration/coverage@20%": 0.032012193497512484,
"calibration/coverage@25%": 0.06712624280218765,
"calibration/coverage@30%": 0.13888077152042816,
"calibration/coverage@5%": 0.0005235602094240838,
"calibration/ece": 0.20714599782061244,
"calibration/mean_confidence": 0.5138718686696668,
"calibration/prompt_uniqueness": 0.8887597240995356,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.021180555555555557,
"completions/max_length": 3352.2,
"completions/max_terminated_length": 3352.2,
"completions/mean_length": 729.52119140625,
"completions/mean_terminated_length": 745.1978759765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 227.8,
"epoch": 0.11999850001874976,
"grad_norm": 0.0005338288610801101,
"learning_rate": 4.759036144578314e-06,
"loss": -0.0177,
"num_tokens": 98007757.0,
"reward": 0.9368434190750122,
"reward_std": 0.16865328848361968,
"rewards/accuracy_reward": 0.6310763955116272,
"rewards/brier_reward": 0.69019033908844,
"rewards/confidence_uniqueness_reward": 0.9318178653717041,
"rewards/format_reward": 0.9782118082046509,
"rewards/frontier_aurc_reward": -0.002340958220884204,
"rewards/frontier_coverage_0": -0.055156460218131545,
"rewards/frontier_coverage_1": -0.055156460218131545,
"rewards/frontier_coverage_10": -0.055156460218131545,
"rewards/frontier_coverage_15": -0.055156460218131545,
"rewards/frontier_coverage_20": -0.055156460218131545,
"rewards/frontier_coverage_25": -0.055156460218131545,
"rewards/frontier_coverage_5": -0.055156460218131545,
"rewards/frontier_ece_reward": -0.008883633697405458,
"rewards/frontier_entropy_batch_reward": -0.24257669150829314,
"signal/accuracy_reward/centered_abs_mean": 0.1930555522441864,
"signal/accuracy_reward/group_bin_occupancy": 0.215625,
"signal/accuracy_reward/group_std_mean": 0.2544387519359589,
"signal/accuracy_reward/group_zero_std_frac": 0.27499999701976774,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0965277761220932,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0965277761220932,
"signal/advantage_abs_mean": 0.1274535983800888,
"signal/advantage_pre_scale_abs_mean": 0.1274535983800888,
"signal/advantage_pre_scale_std": 0.18669797778129577,
"signal/advantage_std": 0.18669797778129577,
"signal/brier_reward/centered_abs_mean": 0.23190079629421234,
"signal/brier_reward/group_bin_occupancy": 0.921875,
"signal/brier_reward/group_std_mean": 0.2818805932998657,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023190080001950265,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.023190080001950265,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04411279484629631,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7649305555555556,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07408891320228576,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0044112796895205975,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0044112796895205975,
"signal/format_reward/centered_abs_mean": 0.035107421875,
"signal/format_reward/group_bin_occupancy": 0.15694444444444444,
"signal/format_reward/group_std_mean": 0.06355542615056038,
"signal/format_reward/group_zero_std_frac": 0.7444444417953491,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0175537109375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0175537109375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018331629456952215,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.646875,
"signal/frontier_aurc_reward/group_std_mean": 0.003077285923063755,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2914535657037048e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2914535657037048e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.2667407602071762,
"signal/frontier_coverage_0/group_bin_occupancy": 0.9173611111111111,
"signal/frontier_coverage_0/group_std_mean": 0.33869033455848696,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0033342594746500255,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0033342594746500255,
"signal/frontier_coverage_1/centered_abs_mean": 0.2667407602071762,
"signal/frontier_coverage_1/group_bin_occupancy": 0.9173611111111111,
"signal/frontier_coverage_1/group_std_mean": 0.33869033455848696,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033342594746500255,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033342594746500255,
"signal/frontier_coverage_10/centered_abs_mean": 0.2667407602071762,
"signal/frontier_coverage_10/group_bin_occupancy": 0.9173611111111111,
"signal/frontier_coverage_10/group_std_mean": 0.33869033455848696,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033342594746500255,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033342594746500255,
"signal/frontier_coverage_15/centered_abs_mean": 0.2667407602071762,
"signal/frontier_coverage_15/group_bin_occupancy": 0.9173611111111111,
"signal/frontier_coverage_15/group_std_mean": 0.33869033455848696,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033342594746500255,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033342594746500255,
"signal/frontier_coverage_20/centered_abs_mean": 0.2667407602071762,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9173611111111111,
"signal/frontier_coverage_20/group_std_mean": 0.33869033455848696,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033342594746500255,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033342594746500255,
"signal/frontier_coverage_25/centered_abs_mean": 0.2667407602071762,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9173611111111111,
"signal/frontier_coverage_25/group_std_mean": 0.33869033455848696,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033342594746500255,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033342594746500255,
"signal/frontier_coverage_5/centered_abs_mean": 0.2667407602071762,
"signal/frontier_coverage_5/group_bin_occupancy": 0.9173611111111111,
"signal/frontier_coverage_5/group_std_mean": 0.33869033455848696,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033342594746500255,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033342594746500255,
"signal/frontier_ece_reward/centered_abs_mean": 0.07847718596458435,
"signal/frontier_ece_reward/group_bin_occupancy": 0.871875,
"signal/frontier_ece_reward/group_std_mean": 0.11430413126945496,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007847718894481659,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007847718894481659,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33370028138160707,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7784722222222222,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40720880031585693,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03337002918124199,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03337002918124199,
"step": 50
},
{
"epoch": 0.11999850001874976,
"eval_calibration/aurc": 0.22663103748517832,
"eval_calibration/batch_distribution_entropy": 0.8944488094393327,
"eval_calibration/batch_entropy_100bins": 0.7018959966227308,
"eval_calibration/batch_entropy_10bins": 0.8944488094393327,
"eval_calibration/batch_entropy_50bins": 0.7678897018331208,
"eval_calibration/batch_uniqueness": 0.8925259603711412,
"eval_calibration/buffer_distribution_entropy": 0.7581864782785342,
"eval_calibration/buffer_entropy_100bins": 0.697479079611122,
"eval_calibration/buffer_entropy_10bins": 0.7581864782785342,
"eval_calibration/buffer_entropy_50bins": 0.7531417625731921,
"eval_calibration/confidence_entropy": 0.49303684207606624,
"eval_calibration/coverage@0%": 0.17893145161290322,
"eval_calibration/coverage@1%": 0.17893145161290322,
"eval_calibration/coverage@10%": 0.23168682795698925,
"eval_calibration/coverage@15%": 0.29939516129032256,
"eval_calibration/coverage@20%": 0.49227150537634407,
"eval_calibration/coverage@25%": 0.7928427419354839,
"eval_calibration/coverage@30%": 0.8991935483870966,
"eval_calibration/coverage@5%": 0.17893145161290322,
"eval_calibration/ece": 0.2323555386214126,
"eval_calibration/mean_confidence": 0.6104373585295977,
"eval_calibration/prompt_uniqueness": 0.8925259603711412,
"eval_completions/clipped_ratio": 0.022395833333333354,
"eval_completions/max_length": 1943.6666666666667,
"eval_completions/max_terminated_length": 1943.6666666666667,
"eval_completions/mean_length": 708.0866495768229,
"eval_completions/mean_terminated_length": 724.3104654947916,
"eval_completions/min_length": 0.0,
"eval_completions/min_terminated_length": 297.1666666666667,
"eval_loss": 0.0,
"eval_num_tokens": 98007757.0,
"eval_reward": 0.8659678896268209,
"eval_reward_std": 0.26238663494586945,
"eval_rewards/accuracy_reward": 0.6319444477558136,
"eval_rewards/brier_reward": 0.736727903286616,
"eval_rewards/confidence_uniqueness_reward": 0.8684482177098592,
"eval_rewards/format_reward": 0.975694457689921,
"eval_rewards/frontier_aurc_reward": -0.0022891214466653764,
"eval_rewards/frontier_coverage_0": -0.01684247803253432,
"eval_rewards/frontier_coverage_1": -0.01684247803253432,
"eval_rewards/frontier_coverage_10": -0.01684247803253432,
"eval_rewards/frontier_coverage_15": -0.01684247803253432,
"eval_rewards/frontier_coverage_20": -0.01684247803253432,
"eval_rewards/frontier_coverage_25": -0.01684247803253432,
"eval_rewards/frontier_coverage_5": -0.01684247803253432,
"eval_rewards/frontier_ece_reward": 0.00702586160817494,
"eval_rewards/frontier_entropy_batch_reward": -0.975694457689921,
"eval_runtime": 211.0238,
"eval_samples_per_second": 4.739,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4508463541666667,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.48167944451173145,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22542317708333334,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22542317708333334,
"eval_signal/advantage_abs_mean": 0.22892138113578162,
"eval_signal/advantage_pre_scale_abs_mean": 0.22892138113578162,
"eval_signal/advantage_pre_scale_std": 0.2612771863738696,
"eval_signal/advantage_std": 0.2612771863738696,
"eval_signal/brier_reward/centered_abs_mean": 0.22721747557322183,
"eval_signal/brier_reward/group_bin_occupancy": 0.923611111111111,
"eval_signal/brier_reward/group_std_mean": 0.2780616382757823,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0227217481782039,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0227217481782039,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06945328476528327,
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3958333333333333,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1254055512448152,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006945328554138541,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006945328554138541,
"eval_signal/format_reward/centered_abs_mean": 0.04589843765522043,
"eval_signal/format_reward/group_bin_occupancy": 0.19097222222222224,
"eval_signal/format_reward/group_std_mean": 0.10953563420722882,
"eval_signal/format_reward/group_zero_std_frac": 0.4722222338120143,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.022949218827610213,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.022949218827610213,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0023004660033620894,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6736111111111112,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004148481646552682,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8755823829366516e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8755823829366516e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.22893314063549042,
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.951388888888889,
"eval_signal/frontier_coverage_0/group_std_mean": 0.3282311459382375,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028616644364471235,
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028616644364471235,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.22893314063549042,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.951388888888889,
"eval_signal/frontier_coverage_1/group_std_mean": 0.3282311459382375,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028616644364471235,
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028616644364471235,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.22893314063549042,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.951388888888889,
"eval_signal/frontier_coverage_10/group_std_mean": 0.3282311459382375,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028616644364471235,
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028616644364471235,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.22893314063549042,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.951388888888889,
"eval_signal/frontier_coverage_15/group_std_mean": 0.3282311459382375,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028616644364471235,
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028616644364471235,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.22893314063549042,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.951388888888889,
"eval_signal/frontier_coverage_20/group_std_mean": 0.3282311459382375,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028616644364471235,
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028616644364471235,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.22893314063549042,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.951388888888889,
"eval_signal/frontier_coverage_25/group_std_mean": 0.3282311459382375,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028616644364471235,
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028616644364471235,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.22893314063549042,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.951388888888889,
"eval_signal/frontier_coverage_5/group_std_mean": 0.3282311459382375,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028616644364471235,
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028616644364471235,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.05358493266006311,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8263888888888888,
"eval_signal/frontier_ece_reward/group_std_mean": 0.08428221692641576,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005358493188396096,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005358493188396096,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.04589843765522043,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.19097222222222224,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.10953563420722882,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.4722222338120143,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004589843874176343,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.004589843874176343,
"eval_steps_per_second": 0.028,
"step": 50
},
{
"calibration/aurc": 0.2683659949197693,
"calibration/batch_distribution_entropy": 0.9526285003912491,
"calibration/batch_entropy_100bins": 0.9450206874209816,
"calibration/batch_entropy_10bins": 0.9526285003912491,
"calibration/batch_entropy_50bins": 0.9561908449747939,
"calibration/batch_uniqueness": 0.9479367620902497,
"calibration/buffer_distribution_entropy": 0.77331458113982,
"calibration/buffer_entropy_100bins": 0.7195050284353803,
"calibration/buffer_entropy_10bins": 0.77331458113982,
"calibration/buffer_entropy_50bins": 0.7715357463295008,
"calibration/confidence_entropy": 0.5168335151397986,
"calibration/coverage@0%": 0.019592974315080607,
"calibration/coverage@1%": 0.019592974315080607,
"calibration/coverage@10%": 0.033857972511265,
"calibration/coverage@15%": 0.07343580892287452,
"calibration/coverage@20%": 0.34841750738119537,
"calibration/coverage@25%": 0.5284259703915656,
"calibration/coverage@30%": 0.6675125886120853,
"calibration/coverage@5%": 0.028036246082890633,
"calibration/ece": 0.17036230442798692,
"calibration/mean_confidence": 0.6091539475261136,
"calibration/prompt_uniqueness": 0.8819837383869557,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.021354166666666674,
"completions/max_length": 3492.2,
"completions/max_terminated_length": 3492.2,
"completions/mean_length": 733.5085205078125,
"completions/mean_terminated_length": 749.6647705078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 229.8,
"epoch": 0.13199835002062474,
"grad_norm": 0.0005942412535659969,
"learning_rate": 4.60843373493976e-06,
"loss": -0.0179,
"num_tokens": 109538351.0,
"reward": 0.9447301387786865,
"reward_std": 0.1707069009542465,
"rewards/accuracy_reward": 0.64296875,
"rewards/brier_reward": 0.728516948223114,
"rewards/confidence_uniqueness_reward": 0.9280948400497436,
"rewards/format_reward": 0.9776041626930236,
"rewards/frontier_aurc_reward": -0.002217937121167779,
"rewards/frontier_coverage_0": -0.0328390815295279,
"rewards/frontier_coverage_1": -0.0328390815295279,
"rewards/frontier_coverage_10": -0.0328390815295279,
"rewards/frontier_coverage_15": -0.0328390815295279,
"rewards/frontier_coverage_20": -0.0328390815295279,
"rewards/frontier_coverage_25": -0.0328390815295279,
"rewards/frontier_coverage_5": -0.0328390815295279,
"rewards/frontier_ece_reward": 0.004757384280674159,
"rewards/frontier_entropy_batch_reward": -0.2879209280014038,
"signal/accuracy_reward/centered_abs_mean": 0.19408094584941865,
"signal/accuracy_reward/group_bin_occupancy": 0.21388888888888893,
"signal/accuracy_reward/group_std_mean": 0.2528954565525055,
"signal/accuracy_reward/group_zero_std_frac": 0.28888889253139494,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09704047292470933,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09704047292470933,
"signal/advantage_abs_mean": 0.12855922281742097,
"signal/advantage_pre_scale_abs_mean": 0.12855922281742097,
"signal/advantage_pre_scale_std": 0.19316387176513672,
"signal/advantage_std": 0.19316387176513672,
"signal/brier_reward/centered_abs_mean": 0.20376710891723632,
"signal/brier_reward/group_bin_occupancy": 0.9003472222222223,
"signal/brier_reward/group_std_mean": 0.2530288904905319,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020376710593700408,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.020376710593700408,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.045846784859895705,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7788194444444445,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07546174824237824,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004584678448736668,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004584678448736668,
"signal/format_reward/centered_abs_mean": 0.03585069477558136,
"signal/format_reward/group_bin_occupancy": 0.15659722222222222,
"signal/format_reward/group_std_mean": 0.06373232007026672,
"signal/format_reward/group_zero_std_frac": 0.7472222328186036,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01792534738779068,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01792534738779068,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020224370528012514,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6652777777777777,
"signal/frontier_aurc_reward/group_std_mean": 0.0033281259704381226,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5280463160015642e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5280463160015642e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.20073602795600892,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_0/group_std_mean": 0.26514851450920107,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025092002935707567,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025092002935707567,
"signal/frontier_coverage_1/centered_abs_mean": 0.20073602795600892,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_1/group_std_mean": 0.26514851450920107,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025092002935707567,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025092002935707567,
"signal/frontier_coverage_10/centered_abs_mean": 0.20073602795600892,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_10/group_std_mean": 0.26514851450920107,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025092002935707567,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025092002935707567,
"signal/frontier_coverage_15/centered_abs_mean": 0.20073602795600892,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_15/group_std_mean": 0.26514851450920107,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025092002935707567,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025092002935707567,
"signal/frontier_coverage_20/centered_abs_mean": 0.20073602795600892,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_20/group_std_mean": 0.26514851450920107,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025092002935707567,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025092002935707567,
"signal/frontier_coverage_25/centered_abs_mean": 0.20073602795600892,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_25/group_std_mean": 0.26514851450920107,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025092002935707567,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025092002935707567,
"signal/frontier_coverage_5/centered_abs_mean": 0.20073602795600892,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_5/group_std_mean": 0.26514851450920107,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025092002935707567,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025092002935707567,
"signal/frontier_ece_reward/centered_abs_mean": 0.05378806218504906,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8496527777777778,
"signal/frontier_ece_reward/group_std_mean": 0.08123364597558975,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005378806497901678,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005378806497901678,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3519828081130981,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7847222222222223,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.42118882536888125,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035198282450437546,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035198282450437546,
"step": 55
},
{
"calibration/aurc": 0.34125538207514033,
"calibration/batch_distribution_entropy": 0.972611014708105,
"calibration/batch_entropy_100bins": 0.9550453951098585,
"calibration/batch_entropy_10bins": 0.972611014708105,
"calibration/batch_entropy_50bins": 0.9672234348309336,
"calibration/batch_uniqueness": 0.9504243423453544,
"calibration/buffer_distribution_entropy": 0.8040459125276394,
"calibration/buffer_entropy_100bins": 0.7606849562747422,
"calibration/buffer_entropy_10bins": 0.8040459125276394,
"calibration/buffer_entropy_50bins": 0.8061715790901408,
"calibration/confidence_entropy": 0.5298289411990107,
"calibration/coverage@0%": 0.0021136100291773113,
"calibration/coverage@1%": 0.0021136100291773113,
"calibration/coverage@10%": 0.019391096940172076,
"calibration/coverage@15%": 0.09173199903161312,
"calibration/coverage@20%": 0.25987322029879684,
"calibration/coverage@25%": 0.29309997918512537,
"calibration/coverage@30%": 0.4806800713830787,
"calibration/coverage@5%": 0.0021136100291773113,
"calibration/ece": 0.19323905598963226,
"calibration/mean_confidence": 0.544135561508949,
"calibration/prompt_uniqueness": 0.8866490591558446,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.023177083333333348,
"completions/max_length": 3648.6,
"completions/max_terminated_length": 3648.6,
"completions/mean_length": 736.1137329101563,
"completions/mean_terminated_length": 753.6991821289063,
"completions/min_length": 0.0,
"completions/min_terminated_length": 196.8,
"epoch": 0.14399820002249972,
"grad_norm": 0.00039395506610162556,
"learning_rate": 4.457831325301205e-06,
"loss": -0.0199,
"num_tokens": 121114957.0,
"reward": 0.9317705035209656,
"reward_std": 0.1709751844406128,
"rewards/accuracy_reward": 0.6081597208976746,
"rewards/brier_reward": 0.7243224620819092,
"rewards/confidence_uniqueness_reward": 0.9290532946586609,
"rewards/format_reward": 0.9764756917953491,
"rewards/frontier_aurc_reward": -0.0022094239946454765,
"rewards/frontier_coverage_0": -0.014207637775689364,
"rewards/frontier_coverage_1": -0.014207637775689364,
"rewards/frontier_coverage_10": -0.014207637775689364,
"rewards/frontier_coverage_15": -0.014207637775689364,
"rewards/frontier_coverage_20": -0.014207637775689364,
"rewards/frontier_coverage_25": -0.014207637775689364,
"rewards/frontier_coverage_5": -0.014207637775689364,
"rewards/frontier_ece_reward": 0.005637143552303314,
"rewards/frontier_entropy_batch_reward": -0.25177713930606843,
"signal/accuracy_reward/centered_abs_mean": 0.20734592378139496,
"signal/accuracy_reward/group_bin_occupancy": 0.2170138888888889,
"signal/accuracy_reward/group_std_mean": 0.26776798665523527,
"signal/accuracy_reward/group_zero_std_frac": 0.26388888657093046,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10367296189069748,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10367296189069748,
"signal/advantage_abs_mean": 0.12827864587306975,
"signal/advantage_pre_scale_abs_mean": 0.12827864587306975,
"signal/advantage_pre_scale_std": 0.19161723256111146,
"signal/advantage_std": 0.19161723256111146,
"signal/brier_reward/centered_abs_mean": 0.20366644859313965,
"signal/brier_reward/group_bin_occupancy": 0.90625,
"signal/brier_reward/group_std_mean": 0.251072758436203,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020366644859313963,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.020366644859313963,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04744342863559723,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7736111111111111,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07852019146084785,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004744342807680368,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004744342807680368,
"signal/format_reward/centered_abs_mean": 0.03862304724752903,
"signal/format_reward/group_bin_occupancy": 0.15868055555555557,
"signal/format_reward/group_std_mean": 0.06846961379051208,
"signal/format_reward/group_zero_std_frac": 0.7305555462837219,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.019311523623764516,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.019311523623764516,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018203360494226217,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6711805555555556,
"signal/frontier_aurc_reward/group_std_mean": 0.00309151909314096,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.275420156365726e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.275420156365726e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.23083798289299012,
"signal/frontier_coverage_0/group_bin_occupancy": 0.898611111111111,
"signal/frontier_coverage_0/group_std_mean": 0.30217787623405457,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002885474869981408,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002885474869981408,
"signal/frontier_coverage_1/centered_abs_mean": 0.23083798289299012,
"signal/frontier_coverage_1/group_bin_occupancy": 0.898611111111111,
"signal/frontier_coverage_1/group_std_mean": 0.30217787623405457,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002885474869981408,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002885474869981408,
"signal/frontier_coverage_10/centered_abs_mean": 0.23083798289299012,
"signal/frontier_coverage_10/group_bin_occupancy": 0.898611111111111,
"signal/frontier_coverage_10/group_std_mean": 0.30217787623405457,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002885474869981408,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002885474869981408,
"signal/frontier_coverage_15/centered_abs_mean": 0.23083798289299012,
"signal/frontier_coverage_15/group_bin_occupancy": 0.898611111111111,
"signal/frontier_coverage_15/group_std_mean": 0.30217787623405457,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002885474869981408,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002885474869981408,
"signal/frontier_coverage_20/centered_abs_mean": 0.23083798289299012,
"signal/frontier_coverage_20/group_bin_occupancy": 0.898611111111111,
"signal/frontier_coverage_20/group_std_mean": 0.30217787623405457,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002885474869981408,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002885474869981408,
"signal/frontier_coverage_25/centered_abs_mean": 0.23083798289299012,
"signal/frontier_coverage_25/group_bin_occupancy": 0.898611111111111,
"signal/frontier_coverage_25/group_std_mean": 0.30217787623405457,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002885474869981408,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002885474869981408,
"signal/frontier_coverage_5/centered_abs_mean": 0.23083798289299012,
"signal/frontier_coverage_5/group_bin_occupancy": 0.898611111111111,
"signal/frontier_coverage_5/group_std_mean": 0.30217787623405457,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002885474869981408,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002885474869981408,
"signal/frontier_ece_reward/centered_abs_mean": 0.05384446457028389,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8677083333333334,
"signal/frontier_ece_reward/group_std_mean": 0.08003931492567062,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005384446494281292,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005384446494281292,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33434916734695436,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7777777777777778,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4062751352787018,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03343491479754448,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03343491479754448,
"step": 60
},
{
"calibration/aurc": 0.23656779693400476,
"calibration/batch_distribution_entropy": 0.9777875846527511,
"calibration/batch_entropy_100bins": 0.9566050467615526,
"calibration/batch_entropy_10bins": 0.9777875846527511,
"calibration/batch_entropy_50bins": 0.9694041980201531,
"calibration/batch_uniqueness": 0.951238184119107,
"calibration/buffer_distribution_entropy": 0.8348047983245668,
"calibration/buffer_entropy_100bins": 0.7944202847632227,
"calibration/buffer_entropy_10bins": 0.8348047983245668,
"calibration/buffer_entropy_50bins": 0.8353967415859247,
"calibration/confidence_entropy": 0.5115842716518644,
"calibration/coverage@0%": 0.010110893193929047,
"calibration/coverage@1%": 0.010110893193929047,
"calibration/coverage@10%": 0.18960810918728915,
"calibration/coverage@15%": 0.3931437969466637,
"calibration/coverage@20%": 0.5138705409531458,
"calibration/coverage@25%": 0.6643819070694328,
"calibration/coverage@30%": 0.7248,
"calibration/coverage@5%": 0.044939389236145405,
"calibration/ece": 0.1552912149271485,
"calibration/mean_confidence": 0.5501857656980181,
"calibration/prompt_uniqueness": 0.8891747813176986,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.022743055555555537,
"completions/max_length": 3102.6,
"completions/max_terminated_length": 3102.6,
"completions/mean_length": 722.5470581054688,
"completions/mean_terminated_length": 739.3680297851563,
"completions/min_length": 0.0,
"completions/min_terminated_length": 211.0,
"epoch": 0.1559980500243747,
"grad_norm": 0.00044656230602413416,
"learning_rate": 4.307228915662651e-06,
"loss": -0.0191,
"num_tokens": 132532747.0,
"reward": 0.9545995354652405,
"reward_std": 0.16012048721313477,
"rewards/accuracy_reward": 0.6457465291023254,
"rewards/brier_reward": 0.7412284135818481,
"rewards/confidence_uniqueness_reward": 0.9310332775115967,
"rewards/format_reward": 0.9772569417953492,
"rewards/frontier_aurc_reward": -0.0018141154432669282,
"rewards/frontier_coverage_0": -0.018469791370444,
"rewards/frontier_coverage_1": -0.018469791370444,
"rewards/frontier_coverage_10": -0.018469791370444,
"rewards/frontier_coverage_15": -0.018469791370444,
"rewards/frontier_coverage_20": -0.018469791370444,
"rewards/frontier_coverage_25": -0.018469791370444,
"rewards/frontier_coverage_5": -0.018469791370444,
"rewards/frontier_ece_reward": 0.006174688460305333,
"rewards/frontier_entropy_batch_reward": -0.23107051253318786,
"signal/accuracy_reward/centered_abs_mean": 0.17658962607383727,
"signal/accuracy_reward/group_bin_occupancy": 0.21319444444444446,
"signal/accuracy_reward/group_std_mean": 0.2395526260137558,
"signal/accuracy_reward/group_zero_std_frac": 0.2944444417953491,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08829481303691863,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08829481303691863,
"signal/advantage_abs_mean": 0.11682901531457901,
"signal/advantage_pre_scale_abs_mean": 0.11682901531457901,
"signal/advantage_pre_scale_std": 0.18256474137306214,
"signal/advantage_std": 0.18256474137306214,
"signal/brier_reward/centered_abs_mean": 0.19553665220737457,
"signal/brier_reward/group_bin_occupancy": 0.8847222222222223,
"signal/brier_reward/group_std_mean": 0.2446680635213852,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019553666189312934,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.019553666189312934,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04568360410630703,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7583333333333333,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07737514227628708,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004568360652774573,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004568360652774573,
"signal/format_reward/centered_abs_mean": 0.03725043348968029,
"signal/format_reward/group_bin_occupancy": 0.1590277777777778,
"signal/format_reward/group_std_mean": 0.06770127713680267,
"signal/format_reward/group_zero_std_frac": 0.7277777791023254,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.018625216744840144,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.018625216744840144,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014472146751359106,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6895833333333333,
"signal/frontier_aurc_reward/group_std_mean": 0.0024238450918346644,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8090184312313795e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8090184312313795e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.22579068541526795,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8784722222222221,
"signal/frontier_coverage_0/group_std_mean": 0.2969242215156555,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028223836328834296,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028223836328834296,
"signal/frontier_coverage_1/centered_abs_mean": 0.22579068541526795,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8784722222222221,
"signal/frontier_coverage_1/group_std_mean": 0.2969242215156555,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028223836328834296,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028223836328834296,
"signal/frontier_coverage_10/centered_abs_mean": 0.22579068541526795,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8784722222222221,
"signal/frontier_coverage_10/group_std_mean": 0.2969242215156555,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028223836328834296,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028223836328834296,
"signal/frontier_coverage_15/centered_abs_mean": 0.22579068541526795,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8784722222222221,
"signal/frontier_coverage_15/group_std_mean": 0.2969242215156555,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028223836328834296,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028223836328834296,
"signal/frontier_coverage_20/centered_abs_mean": 0.22579068541526795,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8784722222222221,
"signal/frontier_coverage_20/group_std_mean": 0.2969242215156555,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028223836328834296,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028223836328834296,
"signal/frontier_coverage_25/centered_abs_mean": 0.22579068541526795,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8784722222222221,
"signal/frontier_coverage_25/group_std_mean": 0.2969242215156555,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028223836328834296,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028223836328834296,
"signal/frontier_coverage_5/centered_abs_mean": 0.22579068541526795,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8784722222222221,
"signal/frontier_coverage_5/group_std_mean": 0.2969242215156555,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028223836328834296,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028223836328834296,
"signal/frontier_ece_reward/centered_abs_mean": 0.050039660185575485,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8552083333333333,
"signal/frontier_ece_reward/group_std_mean": 0.07259590029716492,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005003966204822063,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005003966204822063,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3180016040802002,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7805555555555556,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39476526975631715,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03180016092956066,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03180016092956066,
"step": 65
},
{
"calibration/aurc": 0.2948536564114072,
"calibration/batch_distribution_entropy": 0.9733400726258928,
"calibration/batch_entropy_100bins": 0.9536061745510522,
"calibration/batch_entropy_10bins": 0.9733400726258928,
"calibration/batch_entropy_50bins": 0.9674596609580579,
"calibration/batch_uniqueness": 0.949705160752441,
"calibration/buffer_distribution_entropy": 0.8540476081873998,
"calibration/buffer_entropy_100bins": 0.8192057037753948,
"calibration/buffer_entropy_10bins": 0.8540476081873998,
"calibration/buffer_entropy_50bins": 0.8557242505965151,
"calibration/confidence_entropy": 0.49067170223761797,
"calibration/coverage@0%": 0.022465164274228008,
"calibration/coverage@1%": 0.022465164274228008,
"calibration/coverage@10%": 0.04398193654670511,
"calibration/coverage@15%": 0.11319772520337099,
"calibration/coverage@20%": 0.20620307161185872,
"calibration/coverage@25%": 0.3230422460624132,
"calibration/coverage@30%": 0.46851682623276936,
"calibration/coverage@5%": 0.032168668317354696,
"calibration/ece": 0.16806239032062145,
"calibration/mean_confidence": 0.5722495755231523,
"calibration/prompt_uniqueness": 0.8687714538093431,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 3525.8,
"completions/max_terminated_length": 3525.8,
"completions/mean_length": 725.2604125976562,
"completions/mean_terminated_length": 742.6922485351563,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.6,
"epoch": 0.16799790002624967,
"grad_norm": 0.00043501320760697126,
"learning_rate": 4.156626506024097e-06,
"loss": -0.0206,
"num_tokens": 143965891.0,
"reward": 0.9423856019973755,
"reward_std": 0.16414594948291777,
"rewards/accuracy_reward": 0.6236111164093018,
"rewards/brier_reward": 0.7365549683570862,
"rewards/confidence_uniqueness_reward": 0.9279781222343445,
"rewards/format_reward": 0.9764757037162781,
"rewards/frontier_aurc_reward": -0.0020766297122463582,
"rewards/frontier_coverage_0": 0.0018009988591074944,
"rewards/frontier_coverage_1": 0.0018009988591074944,
"rewards/frontier_coverage_10": 0.0018009988591074944,
"rewards/frontier_coverage_15": 0.0018009988591074944,
"rewards/frontier_coverage_20": 0.0018009988591074944,
"rewards/frontier_coverage_25": 0.0018009988591074944,
"rewards/frontier_coverage_5": 0.0018009988591074944,
"rewards/frontier_ece_reward": 0.009368815366178751,
"rewards/frontier_entropy_batch_reward": -0.25179632306098937,
"signal/accuracy_reward/centered_abs_mean": 0.1868381083011627,
"signal/accuracy_reward/group_bin_occupancy": 0.21284722222222224,
"signal/accuracy_reward/group_std_mean": 0.24697498977184296,
"signal/accuracy_reward/group_zero_std_frac": 0.2972222238779068,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09341905415058135,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09341905415058135,
"signal/advantage_abs_mean": 0.12035283744335175,
"signal/advantage_pre_scale_abs_mean": 0.12035283744335175,
"signal/advantage_pre_scale_std": 0.1880349338054657,
"signal/advantage_std": 0.1880349338054657,
"signal/brier_reward/centered_abs_mean": 0.20086349546909332,
"signal/brier_reward/group_bin_occupancy": 0.8625,
"signal/brier_reward/group_std_mean": 0.2517173230648041,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020086349919438362,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.020086349919438362,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05000351741909981,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7725694444444444,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08059784770011902,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005000351928174496,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005000351928174496,
"signal/format_reward/centered_abs_mean": 0.03988172635436058,
"signal/format_reward/group_bin_occupancy": 0.15659722222222222,
"signal/format_reward/group_std_mean": 0.06865522116422654,
"signal/format_reward/group_zero_std_frac": 0.7472222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01994086317718029,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01994086317718029,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018553413217887282,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6861111111111111,
"signal/frontier_aurc_reward/group_std_mean": 0.0029855409171432256,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3191767104435713e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3191767104435713e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.22352704107761384,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8545138888888889,
"signal/frontier_coverage_0/group_std_mean": 0.29782513380050657,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027940880972892046,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027940880972892046,
"signal/frontier_coverage_1/centered_abs_mean": 0.22352704107761384,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8545138888888889,
"signal/frontier_coverage_1/group_std_mean": 0.29782513380050657,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027940880972892046,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027940880972892046,
"signal/frontier_coverage_10/centered_abs_mean": 0.22352704107761384,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8545138888888889,
"signal/frontier_coverage_10/group_std_mean": 0.29782513380050657,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027940880972892046,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027940880972892046,
"signal/frontier_coverage_15/centered_abs_mean": 0.22352704107761384,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8545138888888889,
"signal/frontier_coverage_15/group_std_mean": 0.29782513380050657,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027940880972892046,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027940880972892046,
"signal/frontier_coverage_20/centered_abs_mean": 0.22352704107761384,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8545138888888889,
"signal/frontier_coverage_20/group_std_mean": 0.29782513380050657,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027940880972892046,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027940880972892046,
"signal/frontier_coverage_25/centered_abs_mean": 0.22352704107761384,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8545138888888889,
"signal/frontier_coverage_25/group_std_mean": 0.29782513380050657,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027940880972892046,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027940880972892046,
"signal/frontier_coverage_5/centered_abs_mean": 0.22352704107761384,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8545138888888889,
"signal/frontier_coverage_5/group_std_mean": 0.29782513380050657,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027940880972892046,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027940880972892046,
"signal/frontier_ece_reward/centered_abs_mean": 0.04868664965033531,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8552083333333333,
"signal/frontier_ece_reward/group_std_mean": 0.07001925408840179,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004868665337562561,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004868665337562561,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3257962942123413,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7805555555555557,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4016845703125,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03257962986826897,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03257962986826897,
"step": 70
},
{
"calibration/aurc": 0.21711738393930857,
"calibration/batch_distribution_entropy": 0.9600224396836108,
"calibration/batch_entropy_100bins": 0.9466081240419243,
"calibration/batch_entropy_10bins": 0.9600224396836108,
"calibration/batch_entropy_50bins": 0.9598622254467092,
"calibration/batch_uniqueness": 0.9482612156029203,
"calibration/buffer_distribution_entropy": 0.868126624309034,
"calibration/buffer_entropy_100bins": 0.8390208000202177,
"calibration/buffer_entropy_10bins": 0.868126624309034,
"calibration/buffer_entropy_50bins": 0.8714659369493593,
"calibration/confidence_entropy": 0.5153322298773169,
"calibration/coverage@0%": 0.042552211392207835,
"calibration/coverage@1%": 0.042552211392207835,
"calibration/coverage@10%": 0.16360546316683083,
"calibration/coverage@15%": 0.39607171121408147,
"calibration/coverage@20%": 0.5625821359146268,
"calibration/coverage@25%": 0.6644978024470013,
"calibration/coverage@30%": 0.7292566187847698,
"calibration/coverage@5%": 0.05221326791108637,
"calibration/ece": 0.1500837966971388,
"calibration/mean_confidence": 0.5832129899789663,
"calibration/prompt_uniqueness": 0.8774171238232608,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01753472222222221,
"completions/max_length": 3559.8,
"completions/max_terminated_length": 3559.8,
"completions/mean_length": 735.5065185546875,
"completions/mean_terminated_length": 748.6990966796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 243.4,
"epoch": 0.17999775002812465,
"grad_norm": 0.0004371130489744246,
"learning_rate": 4.006024096385543e-06,
"loss": -0.0143,
"num_tokens": 155503822.0,
"reward": 0.9730815052986145,
"reward_std": 0.1551417291164398,
"rewards/accuracy_reward": 0.6793402791023254,
"rewards/brier_reward": 0.7635630369186401,
"rewards/confidence_uniqueness_reward": 0.9331399917602539,
"rewards/format_reward": 0.9823784708976746,
"rewards/frontier_aurc_reward": -0.0015793633414432407,
"rewards/frontier_coverage_0": -0.01739480420947075,
"rewards/frontier_coverage_1": -0.01739480420947075,
"rewards/frontier_coverage_10": -0.01739480420947075,
"rewards/frontier_coverage_15": -0.01739480420947075,
"rewards/frontier_coverage_20": -0.01739480420947075,
"rewards/frontier_coverage_25": -0.01739480420947075,
"rewards/frontier_coverage_5": -0.01739480420947075,
"rewards/frontier_ece_reward": 0.008008561190217733,
"rewards/frontier_entropy_batch_reward": -0.267072793841362,
"signal/accuracy_reward/centered_abs_mean": 0.18628471791744233,
"signal/accuracy_reward/group_bin_occupancy": 0.21180555555555558,
"signal/accuracy_reward/group_std_mean": 0.24508444964885712,
"signal/accuracy_reward/group_zero_std_frac": 0.30555555522441863,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09314235895872117,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09314235895872117,
"signal/advantage_abs_mean": 0.11387252360582352,
"signal/advantage_pre_scale_abs_mean": 0.11387252360582352,
"signal/advantage_pre_scale_std": 0.1787475287914276,
"signal/advantage_std": 0.1787475287914276,
"signal/brier_reward/centered_abs_mean": 0.18025039732456208,
"signal/brier_reward/group_bin_occupancy": 0.8708333333333332,
"signal/brier_reward/group_std_mean": 0.22753545939922332,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01802504062652588,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01802504062652588,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04140819758176804,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7996527777777779,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06981581598520278,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004140820214524865,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004140820214524865,
"signal/format_reward/centered_abs_mean": 0.03043077252805233,
"signal/format_reward/group_bin_occupancy": 0.15416666666666667,
"signal/format_reward/group_std_mean": 0.05725453943014145,
"signal/format_reward/group_zero_std_frac": 0.7666666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015215386264026165,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.015215386264026165,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014222318306565285,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7027777777777777,
"signal/frontier_aurc_reward/group_std_mean": 0.002294929837808013,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7777897664927877e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7777897664927877e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.21255133748054506,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8583333333333332,
"signal/frontier_coverage_0/group_std_mean": 0.2832107603549957,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002656891755759716,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002656891755759716,
"signal/frontier_coverage_1/centered_abs_mean": 0.21255133748054506,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8583333333333332,
"signal/frontier_coverage_1/group_std_mean": 0.2832107603549957,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002656891755759716,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002656891755759716,
"signal/frontier_coverage_10/centered_abs_mean": 0.21255133748054506,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8583333333333332,
"signal/frontier_coverage_10/group_std_mean": 0.2832107603549957,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002656891755759716,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002656891755759716,
"signal/frontier_coverage_15/centered_abs_mean": 0.21255133748054506,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8583333333333332,
"signal/frontier_coverage_15/group_std_mean": 0.2832107603549957,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002656891755759716,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002656891755759716,
"signal/frontier_coverage_20/centered_abs_mean": 0.21255133748054506,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8583333333333332,
"signal/frontier_coverage_20/group_std_mean": 0.2832107603549957,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002656891755759716,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002656891755759716,
"signal/frontier_coverage_25/centered_abs_mean": 0.21255133748054506,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8583333333333332,
"signal/frontier_coverage_25/group_std_mean": 0.2832107603549957,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002656891755759716,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002656891755759716,
"signal/frontier_coverage_5/centered_abs_mean": 0.21255133748054506,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8583333333333332,
"signal/frontier_coverage_5/group_std_mean": 0.2832107603549957,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002656891755759716,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002656891755759716,
"signal/frontier_ece_reward/centered_abs_mean": 0.042948073148727416,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8493055555555555,
"signal/frontier_ece_reward/group_std_mean": 0.06208599209785461,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004294807370752096,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004294807370752096,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3377181708812714,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.78125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4080219030380249,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033771815896034243,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033771815896034243,
"step": 75
},
{
"calibration/aurc": 0.19194942395941994,
"calibration/batch_distribution_entropy": 0.9505285940513144,
"calibration/batch_entropy_100bins": 0.9470248836010062,
"calibration/batch_entropy_10bins": 0.9505285940513144,
"calibration/batch_entropy_50bins": 0.9568973139463551,
"calibration/batch_uniqueness": 0.9461827633448667,
"calibration/buffer_distribution_entropy": 0.8791427141187192,
"calibration/buffer_entropy_100bins": 0.8552186845109102,
"calibration/buffer_entropy_10bins": 0.8791427141187192,
"calibration/buffer_entropy_50bins": 0.884118126026354,
"calibration/confidence_entropy": 0.4923999060736496,
"calibration/coverage@0%": 0.026547209660839854,
"calibration/coverage@1%": 0.026547209660839854,
"calibration/coverage@10%": 0.2988454440806766,
"calibration/coverage@15%": 0.41392928557371744,
"calibration/coverage@20%": 0.5923071216089244,
"calibration/coverage@25%": 0.7086820744187616,
"calibration/coverage@30%": 0.8250354274917335,
"calibration/coverage@5%": 0.10274072601183652,
"calibration/ece": 0.18467594070419555,
"calibration/mean_confidence": 0.616226704959489,
"calibration/prompt_uniqueness": 0.8645967746439295,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.021788194444444443,
"completions/max_length": 3473.2,
"completions/max_terminated_length": 3473.2,
"completions/mean_length": 765.4503540039062,
"completions/mean_terminated_length": 782.624853515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 227.0,
"epoch": 0.19199760002999963,
"grad_norm": 0.00039678241591900587,
"learning_rate": 3.855421686746989e-06,
"loss": -0.0185,
"num_tokens": 167375090.0,
"reward": 0.9544713973999024,
"reward_std": 0.15620069205760956,
"rewards/accuracy_reward": 0.6460069417953491,
"rewards/brier_reward": 0.7477240562438965,
"rewards/confidence_uniqueness_reward": 0.9298016190528869,
"rewards/format_reward": 0.978124988079071,
"rewards/frontier_aurc_reward": -0.0018646372482180595,
"rewards/frontier_coverage_0": -0.009632312413305043,
"rewards/frontier_coverage_1": -0.009632312413305043,
"rewards/frontier_coverage_10": -0.009632312413305043,
"rewards/frontier_coverage_15": -0.009632312413305043,
"rewards/frontier_coverage_20": -0.009632312413305043,
"rewards/frontier_coverage_25": -0.009632312413305043,
"rewards/frontier_coverage_5": -0.009632312413305043,
"rewards/frontier_ece_reward": 0.008436152525246144,
"rewards/frontier_entropy_batch_reward": -0.25324631929397584,
"signal/accuracy_reward/centered_abs_mean": 0.18125,
"signal/accuracy_reward/group_bin_occupancy": 0.21354166666666666,
"signal/accuracy_reward/group_std_mean": 0.24379155337810515,
"signal/accuracy_reward/group_zero_std_frac": 0.29166666865348817,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.090625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.090625,
"signal/advantage_abs_mean": 0.11512753665447235,
"signal/advantage_pre_scale_abs_mean": 0.11512753665447235,
"signal/advantage_pre_scale_std": 0.17972079813480377,
"signal/advantage_std": 0.17972079813480377,
"signal/brier_reward/centered_abs_mean": 0.18024792075157164,
"signal/brier_reward/group_bin_occupancy": 0.8690972222222222,
"signal/brier_reward/group_std_mean": 0.22731645703315734,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018024792522192003,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.018024792522192003,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04387593828141689,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.804861111111111,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06978548243641854,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004387593921273946,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004387593921273946,
"signal/format_reward/centered_abs_mean": 0.03373480923473835,
"signal/format_reward/group_bin_occupancy": 0.15243055555555557,
"signal/format_reward/group_std_mean": 0.057917628437280655,
"signal/format_reward/group_zero_std_frac": 0.7805555701255799,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016867404617369176,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.016867404617369176,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017565070651471616,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6854166666666667,
"signal/frontier_aurc_reward/group_std_mean": 0.0028648764360696076,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1956339696771466e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1956339696771466e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.20747113823890687,
"signal/frontier_coverage_0/group_bin_occupancy": 0.85,
"signal/frontier_coverage_0/group_std_mean": 0.27563032507896423,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025933892466127872,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025933892466127872,
"signal/frontier_coverage_1/centered_abs_mean": 0.20747113823890687,
"signal/frontier_coverage_1/group_bin_occupancy": 0.85,
"signal/frontier_coverage_1/group_std_mean": 0.27563032507896423,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025933892466127872,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025933892466127872,
"signal/frontier_coverage_10/centered_abs_mean": 0.20747113823890687,
"signal/frontier_coverage_10/group_bin_occupancy": 0.85,
"signal/frontier_coverage_10/group_std_mean": 0.27563032507896423,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025933892466127872,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025933892466127872,
"signal/frontier_coverage_15/centered_abs_mean": 0.20747113823890687,
"signal/frontier_coverage_15/group_bin_occupancy": 0.85,
"signal/frontier_coverage_15/group_std_mean": 0.27563032507896423,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025933892466127872,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025933892466127872,
"signal/frontier_coverage_20/centered_abs_mean": 0.20747113823890687,
"signal/frontier_coverage_20/group_bin_occupancy": 0.85,
"signal/frontier_coverage_20/group_std_mean": 0.27563032507896423,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025933892466127872,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025933892466127872,
"signal/frontier_coverage_25/centered_abs_mean": 0.20747113823890687,
"signal/frontier_coverage_25/group_bin_occupancy": 0.85,
"signal/frontier_coverage_25/group_std_mean": 0.27563032507896423,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025933892466127872,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025933892466127872,
"signal/frontier_coverage_5/centered_abs_mean": 0.20747113823890687,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85,
"signal/frontier_coverage_5/group_std_mean": 0.27563032507896423,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025933892466127872,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025933892466127872,
"signal/frontier_ece_reward/centered_abs_mean": 0.04286099076271057,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8486111111111111,
"signal/frontier_ece_reward/group_std_mean": 0.06143382340669632,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004286099225282669,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004286099225282669,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32150877714157106,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.763888888888889,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39428759813308717,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03215087540447712,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03215087540447712,
"step": 80
},
{
"calibration/aurc": 0.20464321287276627,
"calibration/batch_distribution_entropy": 0.9734846034862557,
"calibration/batch_entropy_100bins": 0.9582855024671899,
"calibration/batch_entropy_10bins": 0.9734846034862557,
"calibration/batch_entropy_50bins": 0.9687551046359546,
"calibration/batch_uniqueness": 0.9506750992026983,
"calibration/buffer_distribution_entropy": 0.8894459259708161,
"calibration/buffer_entropy_100bins": 0.8693461376608582,
"calibration/buffer_entropy_10bins": 0.8894459259708161,
"calibration/buffer_entropy_50bins": 0.8952655505976255,
"calibration/confidence_entropy": 0.49973062168606786,
"calibration/coverage@0%": 0.017135361002953733,
"calibration/coverage@1%": 0.017135361002953733,
"calibration/coverage@10%": 0.19827927770527864,
"calibration/coverage@15%": 0.34445682881222645,
"calibration/coverage@20%": 0.4741548295452036,
"calibration/coverage@25%": 0.7317244536788657,
"calibration/coverage@30%": 0.83289196155607,
"calibration/coverage@5%": 0.07288281577479874,
"calibration/ece": 0.1435432468710961,
"calibration/mean_confidence": 0.530408786754918,
"calibration/prompt_uniqueness": 0.8742536493030706,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01605902777777779,
"completions/max_length": 3906.4,
"completions/max_terminated_length": 3906.4,
"completions/mean_length": 765.7278686523438,
"completions/mean_terminated_length": 778.280224609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 265.2,
"epoch": 0.2039974500318746,
"grad_norm": 0.00038754488923586905,
"learning_rate": 3.7048192771084342e-06,
"loss": -0.0137,
"num_tokens": 179283475.0,
"reward": 0.9706011414527893,
"reward_std": 0.15045669674873352,
"rewards/accuracy_reward": 0.6691840291023254,
"rewards/brier_reward": 0.7690122842788696,
"rewards/confidence_uniqueness_reward": 0.9347800016403198,
"rewards/format_reward": 0.9837673544883728,
"rewards/frontier_aurc_reward": -0.001625478290952742,
"rewards/frontier_coverage_0": -0.0016857189650181681,
"rewards/frontier_coverage_1": -0.0016857189650181681,
"rewards/frontier_coverage_10": -0.0016857189650181681,
"rewards/frontier_coverage_15": -0.0016857189650181681,
"rewards/frontier_coverage_20": -0.0016857189650181681,
"rewards/frontier_coverage_25": -0.0016857189650181681,
"rewards/frontier_coverage_5": -0.0016857189650181681,
"rewards/frontier_ece_reward": 0.010668071359395981,
"rewards/frontier_entropy_batch_reward": -0.2715276062488556,
"signal/accuracy_reward/centered_abs_mean": 0.18636610209941865,
"signal/accuracy_reward/group_bin_occupancy": 0.21215277777777777,
"signal/accuracy_reward/group_std_mean": 0.2449056774377823,
"signal/accuracy_reward/group_zero_std_frac": 0.30277777910232545,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09318305104970932,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09318305104970932,
"signal/advantage_abs_mean": 0.11073940545320511,
"signal/advantage_pre_scale_abs_mean": 0.11073940545320511,
"signal/advantage_pre_scale_std": 0.17361874580383302,
"signal/advantage_std": 0.17361874580383302,
"signal/brier_reward/centered_abs_mean": 0.1742205113172531,
"signal/brier_reward/group_bin_occupancy": 0.8534722222222222,
"signal/brier_reward/group_std_mean": 0.22123693227767943,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0174220509827137,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.0174220509827137,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03802314177155495,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8024305555555555,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06435777395963668,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038023141212761404,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038023141212761404,
"signal/format_reward/centered_abs_mean": 0.02753363735973835,
"signal/format_reward/group_bin_occupancy": 0.15208333333333335,
"signal/format_reward/group_std_mean": 0.052114753425121306,
"signal/format_reward/group_zero_std_frac": 0.7833333492279053,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013766818679869175,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013766818679869175,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001538053946569562,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6909722222222222,
"signal/frontier_aurc_reward/group_std_mean": 0.0024671837454661727,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9225675350753592e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9225675350753592e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.21704848110675812,
"signal/frontier_coverage_0/group_bin_occupancy": 0.846875,
"signal/frontier_coverage_0/group_std_mean": 0.28512428402900697,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002713105920702219,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002713105920702219,
"signal/frontier_coverage_1/centered_abs_mean": 0.21704848110675812,
"signal/frontier_coverage_1/group_bin_occupancy": 0.846875,
"signal/frontier_coverage_1/group_std_mean": 0.28512428402900697,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002713105920702219,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002713105920702219,
"signal/frontier_coverage_10/centered_abs_mean": 0.21704848110675812,
"signal/frontier_coverage_10/group_bin_occupancy": 0.846875,
"signal/frontier_coverage_10/group_std_mean": 0.28512428402900697,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002713105920702219,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002713105920702219,
"signal/frontier_coverage_15/centered_abs_mean": 0.21704848110675812,
"signal/frontier_coverage_15/group_bin_occupancy": 0.846875,
"signal/frontier_coverage_15/group_std_mean": 0.28512428402900697,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002713105920702219,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002713105920702219,
"signal/frontier_coverage_20/centered_abs_mean": 0.21704848110675812,
"signal/frontier_coverage_20/group_bin_occupancy": 0.846875,
"signal/frontier_coverage_20/group_std_mean": 0.28512428402900697,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002713105920702219,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002713105920702219,
"signal/frontier_coverage_25/centered_abs_mean": 0.21704848110675812,
"signal/frontier_coverage_25/group_bin_occupancy": 0.846875,
"signal/frontier_coverage_25/group_std_mean": 0.28512428402900697,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002713105920702219,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002713105920702219,
"signal/frontier_coverage_5/centered_abs_mean": 0.21704848110675812,
"signal/frontier_coverage_5/group_bin_occupancy": 0.846875,
"signal/frontier_coverage_5/group_std_mean": 0.28512428402900697,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002713105920702219,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002713105920702219,
"signal/frontier_ece_reward/centered_abs_mean": 0.042044655233621595,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8402777777777777,
"signal/frontier_ece_reward/group_std_mean": 0.05982731878757477,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004204465728253126,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004204465728253126,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3326686263084412,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7815972222222222,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40350683927536013,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033266863971948626,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033266863971948626,
"step": 85
},
{
"calibration/aurc": 0.16738103314210595,
"calibration/batch_distribution_entropy": 0.9697290198955569,
"calibration/batch_entropy_100bins": 0.957681854552531,
"calibration/batch_entropy_10bins": 0.9697290198955569,
"calibration/batch_entropy_50bins": 0.9673187504894678,
"calibration/batch_uniqueness": 0.9503381676350635,
"calibration/buffer_distribution_entropy": 0.8989638723450991,
"calibration/buffer_entropy_100bins": 0.8812589203185311,
"calibration/buffer_entropy_10bins": 0.8989638723450991,
"calibration/buffer_entropy_50bins": 0.9050185855286758,
"calibration/confidence_entropy": 0.5001923409519085,
"calibration/coverage@0%": 0.012599211874791133,
"calibration/coverage@1%": 0.012599211874791133,
"calibration/coverage@10%": 0.3671920254539378,
"calibration/coverage@15%": 0.5097086071813152,
"calibration/coverage@20%": 0.6111030851992127,
"calibration/coverage@25%": 0.8049149330696966,
"calibration/coverage@30%": 0.9180851063829788,
"calibration/coverage@5%": 0.08726653299171958,
"calibration/ece": 0.16554122268343638,
"calibration/mean_confidence": 0.577438955676482,
"calibration/prompt_uniqueness": 0.871450176841621,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.018402777777777813,
"completions/max_length": 3625.4,
"completions/max_terminated_length": 3625.4,
"completions/mean_length": 732.7577270507812,
"completions/mean_terminated_length": 746.5180297851563,
"completions/min_length": 0.0,
"completions/min_terminated_length": 248.0,
"epoch": 0.2159973000337496,
"grad_norm": 0.0003915593842975795,
"learning_rate": 3.5542168674698798e-06,
"loss": -0.0155,
"num_tokens": 190793516.0,
"reward": 0.9725017189979553,
"reward_std": 0.150723797082901,
"rewards/accuracy_reward": 0.6779513835906983,
"rewards/brier_reward": 0.7679128766059875,
"rewards/confidence_uniqueness_reward": 0.9321897506713868,
"rewards/format_reward": 0.9814236164093018,
"rewards/frontier_aurc_reward": -0.0015417236601933837,
"rewards/frontier_coverage_0": -0.012057388108223677,
"rewards/frontier_coverage_1": -0.012057388108223677,
"rewards/frontier_coverage_10": -0.012057388108223677,
"rewards/frontier_coverage_15": -0.012057388108223677,
"rewards/frontier_coverage_20": -0.012057388108223677,
"rewards/frontier_coverage_25": -0.012057388108223677,
"rewards/frontier_coverage_5": -0.012057388108223677,
"rewards/frontier_ece_reward": 0.009636924415826798,
"rewards/frontier_entropy_batch_reward": -0.270854526758194,
"signal/accuracy_reward/centered_abs_mean": 0.17960069477558135,
"signal/accuracy_reward/group_bin_occupancy": 0.20625,
"signal/accuracy_reward/group_std_mean": 0.2330833613872528,
"signal/accuracy_reward/group_zero_std_frac": 0.35,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08980034738779068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08980034738779068,
"signal/advantage_abs_mean": 0.11121289134025573,
"signal/advantage_pre_scale_abs_mean": 0.11121289134025573,
"signal/advantage_pre_scale_std": 0.1761443316936493,
"signal/advantage_std": 0.1761443316936493,
"signal/brier_reward/centered_abs_mean": 0.17101071774959564,
"signal/brier_reward/group_bin_occupancy": 0.851388888888889,
"signal/brier_reward/group_std_mean": 0.21667629480361938,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017101072520017625,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017101072520017625,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04075642824172974,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7923611111111112,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0677463486790657,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0040756430942565204,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0040756430942565204,
"signal/format_reward/centered_abs_mean": 0.0303819440305233,
"signal/format_reward/group_bin_occupancy": 0.15277777777777776,
"signal/format_reward/group_std_mean": 0.055435144901275636,
"signal/format_reward/group_zero_std_frac": 0.7777777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01519097201526165,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01519097201526165,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016192136332392692,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.678125,
"signal/frontier_aurc_reward/group_std_mean": 0.002686009602621198,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.02401693968568e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.02401693968568e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.2019166976213455,
"signal/frontier_coverage_0/group_bin_occupancy": 0.845486111111111,
"signal/frontier_coverage_0/group_std_mean": 0.26531084775924685,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025239587295800446,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025239587295800446,
"signal/frontier_coverage_1/centered_abs_mean": 0.2019166976213455,
"signal/frontier_coverage_1/group_bin_occupancy": 0.845486111111111,
"signal/frontier_coverage_1/group_std_mean": 0.26531084775924685,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025239587295800446,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025239587295800446,
"signal/frontier_coverage_10/centered_abs_mean": 0.2019166976213455,
"signal/frontier_coverage_10/group_bin_occupancy": 0.845486111111111,
"signal/frontier_coverage_10/group_std_mean": 0.26531084775924685,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025239587295800446,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025239587295800446,
"signal/frontier_coverage_15/centered_abs_mean": 0.2019166976213455,
"signal/frontier_coverage_15/group_bin_occupancy": 0.845486111111111,
"signal/frontier_coverage_15/group_std_mean": 0.26531084775924685,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025239587295800446,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025239587295800446,
"signal/frontier_coverage_20/centered_abs_mean": 0.2019166976213455,
"signal/frontier_coverage_20/group_bin_occupancy": 0.845486111111111,
"signal/frontier_coverage_20/group_std_mean": 0.26531084775924685,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025239587295800446,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025239587295800446,
"signal/frontier_coverage_25/centered_abs_mean": 0.2019166976213455,
"signal/frontier_coverage_25/group_bin_occupancy": 0.845486111111111,
"signal/frontier_coverage_25/group_std_mean": 0.26531084775924685,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025239587295800446,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025239587295800446,
"signal/frontier_coverage_5/centered_abs_mean": 0.2019166976213455,
"signal/frontier_coverage_5/group_bin_occupancy": 0.845486111111111,
"signal/frontier_coverage_5/group_std_mean": 0.26531084775924685,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025239587295800446,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025239587295800446,
"signal/frontier_ece_reward/centered_abs_mean": 0.03998432978987694,
"signal/frontier_ece_reward/group_bin_occupancy": 0.825,
"signal/frontier_ece_reward/group_std_mean": 0.056585590541362765,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0039984329603612425,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0039984329603612425,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32754635214805605,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7697916666666667,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3992260992527008,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03275463432073593,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03275463432073593,
"step": 90
},
{
"calibration/aurc": 0.2147605719417338,
"calibration/batch_distribution_entropy": 0.967722082431089,
"calibration/batch_entropy_100bins": 0.9534754754805925,
"calibration/batch_entropy_10bins": 0.967722082431089,
"calibration/batch_entropy_50bins": 0.9641603425298937,
"calibration/batch_uniqueness": 0.9504319540791502,
"calibration/buffer_distribution_entropy": 0.906951265687233,
"calibration/buffer_entropy_100bins": 0.8916355598204531,
"calibration/buffer_entropy_10bins": 0.906951265687233,
"calibration/buffer_entropy_50bins": 0.9133523836950388,
"calibration/confidence_entropy": 0.5045424732246998,
"calibration/coverage@0%": 0.02540387226948959,
"calibration/coverage@1%": 0.02540387226948959,
"calibration/coverage@10%": 0.16098412853940106,
"calibration/coverage@15%": 0.5059969701039667,
"calibration/coverage@20%": 0.5992519983512614,
"calibration/coverage@25%": 0.6664830665649518,
"calibration/coverage@30%": 0.7247192628504611,
"calibration/coverage@5%": 0.03228217914779647,
"calibration/ece": 0.16749806640188375,
"calibration/mean_confidence": 0.5591985724521795,
"calibration/prompt_uniqueness": 0.8673264458396595,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0125,
"completions/max_length": 2774.4,
"completions/max_terminated_length": 2774.4,
"completions/mean_length": 741.1726684570312,
"completions/mean_terminated_length": 750.5460327148437,
"completions/min_length": 0.0,
"completions/min_terminated_length": 251.0,
"epoch": 0.22799715003562457,
"grad_norm": 0.0003392553189769387,
"learning_rate": 3.4036144578313257e-06,
"loss": -0.0104,
"num_tokens": 202423505.0,
"reward": 0.9711825489997864,
"reward_std": 0.1370842456817627,
"rewards/accuracy_reward": 0.6624131798744202,
"rewards/brier_reward": 0.7755590438842773,
"rewards/confidence_uniqueness_reward": 0.938760507106781,
"rewards/format_reward": 0.9873263955116272,
"rewards/frontier_aurc_reward": -0.0015440285205841064,
"rewards/frontier_coverage_0": 0.004539217054843903,
"rewards/frontier_coverage_1": 0.004539217054843903,
"rewards/frontier_coverage_10": 0.004539217054843903,
"rewards/frontier_coverage_15": 0.004539217054843903,
"rewards/frontier_coverage_20": 0.004539217054843903,
"rewards/frontier_coverage_25": 0.004539217054843903,
"rewards/frontier_coverage_5": 0.004539217054843903,
"rewards/frontier_ece_reward": 0.009655746817588805,
"rewards/frontier_entropy_batch_reward": -0.26462686955928805,
"signal/accuracy_reward/centered_abs_mean": 0.15812174379825591,
"signal/accuracy_reward/group_bin_occupancy": 0.20381944444444441,
"signal/accuracy_reward/group_std_mean": 0.21500875651836396,
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07906087189912796,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07906087189912796,
"signal/advantage_abs_mean": 0.09966149926185608,
"signal/advantage_pre_scale_abs_mean": 0.09966149926185608,
"signal/advantage_pre_scale_std": 0.15955499708652496,
"signal/advantage_std": 0.15955499708652496,
"signal/brier_reward/centered_abs_mean": 0.16119154393672944,
"signal/brier_reward/group_bin_occupancy": 0.8548611111111111,
"signal/brier_reward/group_std_mean": 0.20481694340705872,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016119154170155525,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016119154170155525,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03265211023390293,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8354166666666668,
"signal/confidence_uniqueness_reward/group_std_mean": 0.054433510452508924,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003265211218968034,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003265211218968034,
"signal/format_reward/centered_abs_mean": 0.02172309048473835,
"signal/format_reward/group_bin_occupancy": 0.14618055555555554,
"signal/format_reward/group_std_mean": 0.041437828540802,
"signal/format_reward/group_zero_std_frac": 0.8305555701255798,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010861545242369175,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010861545242369175,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014647976960986853,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6805555555555556,
"signal/frontier_aurc_reward/group_std_mean": 0.0023766457568854095,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.83099717105506e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.83099717105506e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19489201307296752,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8607638888888889,
"signal/frontier_coverage_0/group_std_mean": 0.25580963492393494,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024361501913517714,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024361501913517714,
"signal/frontier_coverage_1/centered_abs_mean": 0.19489201307296752,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8607638888888889,
"signal/frontier_coverage_1/group_std_mean": 0.25580963492393494,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024361501913517714,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024361501913517714,
"signal/frontier_coverage_10/centered_abs_mean": 0.19489201307296752,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8607638888888889,
"signal/frontier_coverage_10/group_std_mean": 0.25580963492393494,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024361501913517714,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024361501913517714,
"signal/frontier_coverage_15/centered_abs_mean": 0.19489201307296752,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8607638888888889,
"signal/frontier_coverage_15/group_std_mean": 0.25580963492393494,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024361501913517714,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024361501913517714,
"signal/frontier_coverage_20/centered_abs_mean": 0.19489201307296752,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8607638888888889,
"signal/frontier_coverage_20/group_std_mean": 0.25580963492393494,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024361501913517714,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024361501913517714,
"signal/frontier_coverage_25/centered_abs_mean": 0.19489201307296752,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8607638888888889,
"signal/frontier_coverage_25/group_std_mean": 0.25580963492393494,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024361501913517714,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024361501913517714,
"signal/frontier_coverage_5/centered_abs_mean": 0.19489201307296752,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8607638888888889,
"signal/frontier_coverage_5/group_std_mean": 0.25580963492393494,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024361501913517714,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024361501913517714,
"signal/frontier_ece_reward/centered_abs_mean": 0.03678325191140175,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8333333333333333,
"signal/frontier_ece_reward/group_std_mean": 0.05242298766970634,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036783250980079174,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036783250980079174,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3345192611217499,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7840277777777777,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40661893486976625,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03345192670822143,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03345192670822143,
"step": 95
},
{
"calibration/aurc": 0.1708679078638365,
"calibration/batch_distribution_entropy": 0.9795370467281626,
"calibration/batch_entropy_100bins": 0.9590961086475867,
"calibration/batch_entropy_10bins": 0.9795370467281626,
"calibration/batch_entropy_50bins": 0.9723127936886226,
"calibration/batch_uniqueness": 0.951951389118833,
"calibration/buffer_distribution_entropy": 0.9131616196726149,
"calibration/buffer_entropy_100bins": 0.9000533082549502,
"calibration/buffer_entropy_10bins": 0.9131616196726149,
"calibration/buffer_entropy_50bins": 0.9200639230855394,
"calibration/confidence_entropy": 0.5023486182458435,
"calibration/coverage@0%": 0.006895599473400167,
"calibration/coverage@1%": 0.006895599473400167,
"calibration/coverage@10%": 0.22638525328449907,
"calibration/coverage@15%": 0.5124370909762469,
"calibration/coverage@20%": 0.7455989620373901,
"calibration/coverage@25%": 0.8765314713019363,
"calibration/coverage@30%": 0.9390113971602606,
"calibration/coverage@5%": 0.01972982407233065,
"calibration/ece": 0.16478280317365548,
"calibration/mean_confidence": 0.5564289649632773,
"calibration/prompt_uniqueness": 0.8679693861343288,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.016579861111111115,
"completions/max_length": 3928.4,
"completions/max_terminated_length": 3928.4,
"completions/mean_length": 758.4973876953125,
"completions/mean_terminated_length": 771.2429443359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 239.0,
"epoch": 0.23999700003749952,
"grad_norm": 0.00033934120438061655,
"learning_rate": 3.2530120481927713e-06,
"loss": -0.0122,
"num_tokens": 214260467.0,
"reward": 0.9764691114425659,
"reward_std": 0.14216096699237823,
"rewards/accuracy_reward": 0.6714409708976745,
"rewards/brier_reward": 0.7798413872718811,
"rewards/confidence_uniqueness_reward": 0.9353215217590332,
"rewards/format_reward": 0.9833333253860473,
"rewards/frontier_aurc_reward": -0.0014284017262980342,
"rewards/frontier_coverage_0": 0.012153985630720853,
"rewards/frontier_coverage_1": 0.012153985630720853,
"rewards/frontier_coverage_10": 0.012153985630720853,
"rewards/frontier_coverage_15": 0.012153985630720853,
"rewards/frontier_coverage_20": 0.012153985630720853,
"rewards/frontier_coverage_25": 0.012153985630720853,
"rewards/frontier_coverage_5": 0.012153985630720853,
"rewards/frontier_ece_reward": 0.012064610421657563,
"rewards/frontier_entropy_batch_reward": -0.24686425030231476,
"signal/accuracy_reward/centered_abs_mean": 0.17261826992034912,
"signal/accuracy_reward/group_bin_occupancy": 0.20659722222222224,
"signal/accuracy_reward/group_std_mean": 0.22805944979190826,
"signal/accuracy_reward/group_zero_std_frac": 0.34722222089767457,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08630913496017456,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08630913496017456,
"signal/advantage_abs_mean": 0.10423466861248017,
"signal/advantage_pre_scale_abs_mean": 0.10423466861248017,
"signal/advantage_pre_scale_std": 0.1661255478858948,
"signal/advantage_std": 0.1661255478858948,
"signal/brier_reward/centered_abs_mean": 0.16386253237724305,
"signal/brier_reward/group_bin_occupancy": 0.853125,
"signal/brier_reward/group_std_mean": 0.20879840552806855,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016386253573000432,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016386253573000432,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03649565950036049,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8076388888888889,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06122687980532646,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00364956590346992,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00364956590346992,
"signal/format_reward/centered_abs_mean": 0.02623697929084301,
"signal/format_reward/group_bin_occupancy": 0.15,
"signal/format_reward/group_std_mean": 0.049075322598218916,
"signal/format_reward/group_zero_std_frac": 0.800000011920929,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013118489645421505,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013118489645421505,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015159687958657742,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6760416666666667,
"signal/frontier_aurc_reward/group_std_mean": 0.002456930186599493,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8949608784168958e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8949608784168958e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.20491735637187958,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8486111111111111,
"signal/frontier_coverage_0/group_std_mean": 0.2684360921382904,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002561466954648495,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002561466954648495,
"signal/frontier_coverage_1/centered_abs_mean": 0.20491735637187958,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8486111111111111,
"signal/frontier_coverage_1/group_std_mean": 0.2684360921382904,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002561466954648495,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002561466954648495,
"signal/frontier_coverage_10/centered_abs_mean": 0.20491735637187958,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8486111111111111,
"signal/frontier_coverage_10/group_std_mean": 0.2684360921382904,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002561466954648495,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002561466954648495,
"signal/frontier_coverage_15/centered_abs_mean": 0.20491735637187958,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8486111111111111,
"signal/frontier_coverage_15/group_std_mean": 0.2684360921382904,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002561466954648495,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002561466954648495,
"signal/frontier_coverage_20/centered_abs_mean": 0.20491735637187958,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8486111111111111,
"signal/frontier_coverage_20/group_std_mean": 0.2684360921382904,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002561466954648495,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002561466954648495,
"signal/frontier_coverage_25/centered_abs_mean": 0.20491735637187958,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8486111111111111,
"signal/frontier_coverage_25/group_std_mean": 0.2684360921382904,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002561466954648495,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002561466954648495,
"signal/frontier_coverage_5/centered_abs_mean": 0.20491735637187958,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8486111111111111,
"signal/frontier_coverage_5/group_std_mean": 0.2684360921382904,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002561466954648495,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002561466954648495,
"signal/frontier_ece_reward/centered_abs_mean": 0.037827713042497636,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8420138888888887,
"signal/frontier_ece_reward/group_std_mean": 0.05301511362195015,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003782771248370409,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003782771248370409,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3126159429550171,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7684027777777778,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3858396053314209,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031261596083641055,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031261596083641055,
"step": 100
},
{
"epoch": 0.23999700003749952,
"eval_calibration/aurc": 0.16370137027749238,
"eval_calibration/batch_distribution_entropy": 0.9336636571209378,
"eval_calibration/batch_entropy_100bins": 0.7040574498413844,
"eval_calibration/batch_entropy_10bins": 0.9336636571209378,
"eval_calibration/batch_entropy_50bins": 0.7745602196729959,
"eval_calibration/batch_uniqueness": 0.8935712853364551,
"eval_calibration/buffer_distribution_entropy": 0.9176718551760042,
"eval_calibration/buffer_entropy_100bins": 0.905262358950738,
"eval_calibration/buffer_entropy_10bins": 0.9176718551760042,
"eval_calibration/buffer_entropy_50bins": 0.9244583301099669,
"eval_calibration/confidence_entropy": 0.4785545865961936,
"eval_calibration/coverage@0%": 0.2054771505376344,
"eval_calibration/coverage@1%": 0.2054771505376344,
"eval_calibration/coverage@10%": 0.3649193548387097,
"eval_calibration/coverage@15%": 0.5431787634408602,
"eval_calibration/coverage@20%": 0.710853494623656,
"eval_calibration/coverage@25%": 0.8108198924731184,
"eval_calibration/coverage@30%": 0.9260752688172044,
"eval_calibration/coverage@5%": 0.25924059139784944,
"eval_calibration/ece": 0.19514030398666016,
"eval_calibration/mean_confidence": 0.5611983746023261,
"eval_calibration/prompt_uniqueness": 0.8935712853364551,
"eval_completions/clipped_ratio": 0.013020833333333334,
"eval_completions/max_length": 2318.1666666666665,
"eval_completions/max_terminated_length": 2318.1666666666665,
"eval_completions/mean_length": 748.2187703450521,
"eval_completions/mean_terminated_length": 758.1032104492188,
"eval_completions/min_length": 123.0,
"eval_completions/min_terminated_length": 307.6666666666667,
"eval_loss": 0.0,
"eval_num_tokens": 214260467.0,
"eval_reward": 0.8898186484972636,
"eval_reward_std": 0.2399557630221049,
"eval_rewards/accuracy_reward": 0.6519097288449606,
"eval_rewards/brier_reward": 0.7833640774091085,
"eval_rewards/confidence_uniqueness_reward": 0.8799956142902374,
"eval_rewards/format_reward": 0.9852430522441864,
"eval_rewards/frontier_aurc_reward": -0.0014197090058587492,
"eval_rewards/frontier_coverage_0": 0.02340823287765185,
"eval_rewards/frontier_coverage_1": 0.02340823287765185,
"eval_rewards/frontier_coverage_10": 0.02340823287765185,
"eval_rewards/frontier_coverage_15": 0.02340823287765185,
"eval_rewards/frontier_coverage_20": 0.02340823287765185,
"eval_rewards/frontier_coverage_25": 0.02340823287765185,
"eval_rewards/frontier_coverage_5": 0.02340823287765185,
"eval_rewards/frontier_ece_reward": 0.014000983831162253,
"eval_rewards/frontier_entropy_batch_reward": -0.9852430522441864,
"eval_runtime": 193.0887,
"eval_samples_per_second": 5.179,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4361436615387599,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.4730866402387619,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21807183076937994,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21807183076937994,
"eval_signal/advantage_abs_mean": 0.2088108335932096,
"eval_signal/advantage_pre_scale_abs_mean": 0.2088108335932096,
"eval_signal/advantage_pre_scale_std": 0.23898746818304062,
"eval_signal/advantage_std": 0.23898746818304062,
"eval_signal/brier_reward/centered_abs_mean": 0.2062627375125885,
"eval_signal/brier_reward/group_bin_occupancy": 0.8611111111111112,
"eval_signal/brier_reward/group_std_mean": 0.2609405269225438,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020626275179286797,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.020626275179286797,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05856152934332689,
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.40277777777777773,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09981899770597617,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005856153244773547,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005856153244773547,
"eval_signal/format_reward/centered_abs_mean": 0.028049044621487457,
"eval_signal/format_reward/group_bin_occupancy": 0.17013888888888887,
"eval_signal/format_reward/group_std_mean": 0.07099391147494316,
"eval_signal/format_reward/group_zero_std_frac": 0.638888900478681,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.014024522310743729,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.014024522310743729,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0020108624982337155,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.670138888888889,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0036396855721250176,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5135781167288467e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5135781167288467e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2666911060611407,
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.8993055555555555,
"eval_signal/frontier_coverage_0/group_std_mean": 0.37884485224882763,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003333638849047323,
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003333638849047323,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.2666911060611407,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.8993055555555555,
"eval_signal/frontier_coverage_1/group_std_mean": 0.37884485224882763,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003333638849047323,
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003333638849047323,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.2666911060611407,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.8993055555555555,
"eval_signal/frontier_coverage_10/group_std_mean": 0.37884485224882763,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003333638849047323,
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003333638849047323,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.2666911060611407,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.8993055555555555,
"eval_signal/frontier_coverage_15/group_std_mean": 0.37884485224882763,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003333638849047323,
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003333638849047323,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2666911060611407,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8993055555555555,
"eval_signal/frontier_coverage_20/group_std_mean": 0.37884485224882763,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003333638849047323,
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003333638849047323,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2666911060611407,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.8993055555555555,
"eval_signal/frontier_coverage_25/group_std_mean": 0.37884485224882763,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003333638849047323,
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003333638849047323,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2666911060611407,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.8993055555555555,
"eval_signal/frontier_coverage_5/group_std_mean": 0.37884485224882763,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003333638849047323,
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003333638849047323,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.041228462010622025,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.875,
"eval_signal/frontier_ece_reward/group_std_mean": 0.05814546967546145,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0041228463329995675,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0041228463329995675,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.028049044621487457,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.17013888888888887,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.07099391147494316,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.638888900478681,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0028049046328912177,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0028049046328912177,
"eval_steps_per_second": 0.031,
"step": 100
},
{
"calibration/aurc": 0.29434606104219985,
"calibration/batch_distribution_entropy": 0.9775606413732308,
"calibration/batch_entropy_100bins": 0.9610723412250273,
"calibration/batch_entropy_10bins": 0.9775606413732308,
"calibration/batch_entropy_50bins": 0.9716482303553293,
"calibration/batch_uniqueness": 0.9517786214609512,
"calibration/buffer_distribution_entropy": 0.9202256372529669,
"calibration/buffer_entropy_100bins": 0.9085458764385524,
"calibration/buffer_entropy_10bins": 0.9202256372529669,
"calibration/buffer_entropy_50bins": 0.9270568801851861,
"calibration/confidence_entropy": 0.4876244774879632,
"calibration/coverage@0%": 0.07971241988213293,
"calibration/coverage@1%": 0.08024433477574996,
"calibration/coverage@10%": 0.15648546952752299,
"calibration/coverage@15%": 0.21573857236440247,
"calibration/coverage@20%": 0.2910429458868611,
"calibration/coverage@25%": 0.3874947635370921,
"calibration/coverage@30%": 0.48382004564167536,
"calibration/coverage@5%": 0.12439327094596273,
"calibration/ece": 0.17036784676882605,
"calibration/mean_confidence": 0.5628817806849853,
"calibration/prompt_uniqueness": 0.8609082618643228,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017708333333333326,
"completions/max_length": 3732.2,
"completions/max_terminated_length": 3732.2,
"completions/mean_length": 744.0646850585938,
"completions/mean_terminated_length": 757.5498779296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 238.8,
"epoch": 0.2519968500393745,
"grad_norm": 0.0006606943206861615,
"learning_rate": 3.1024096385542172e-06,
"loss": -0.0156,
"num_tokens": 225908956.0,
"reward": 0.9728375434875488,
"reward_std": 0.13976509720087052,
"rewards/accuracy_reward": 0.6686632037162781,
"rewards/brier_reward": 0.7778706073760986,
"rewards/confidence_uniqueness_reward": 0.9337520241737366,
"rewards/format_reward": 0.9821180582046509,
"rewards/frontier_aurc_reward": -0.0013738935464061796,
"rewards/frontier_coverage_0": 0.008083556871861219,
"rewards/frontier_coverage_1": 0.008083556871861219,
"rewards/frontier_coverage_10": 0.008083556871861219,
"rewards/frontier_coverage_15": 0.008083556871861219,
"rewards/frontier_coverage_20": 0.008083556871861219,
"rewards/frontier_coverage_25": 0.008083556871861219,
"rewards/frontier_coverage_5": 0.008083556871861219,
"rewards/frontier_ece_reward": 0.011097485572099686,
"rewards/frontier_entropy_batch_reward": -0.25515236556529997,
"signal/accuracy_reward/centered_abs_mean": 0.16565212607383728,
"signal/accuracy_reward/group_bin_occupancy": 0.19930555555555557,
"signal/accuracy_reward/group_std_mean": 0.21403219997882844,
"signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08282606303691864,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08282606303691864,
"signal/advantage_abs_mean": 0.1027535393834114,
"signal/advantage_pre_scale_abs_mean": 0.1027535393834114,
"signal/advantage_pre_scale_std": 0.16651992797851561,
"signal/advantage_std": 0.16651992797851561,
"signal/brier_reward/centered_abs_mean": 0.1597402274608612,
"signal/brier_reward/group_bin_occupancy": 0.845486111111111,
"signal/brier_reward/group_std_mean": 0.20388856828212737,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01597402263432741,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01597402263432741,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.039262811094522475,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7920138888888889,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06549572870135308,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003926281165331602,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003926281165331602,
"signal/format_reward/centered_abs_mean": 0.02922091968357563,
"signal/format_reward/group_bin_occupancy": 0.15173611111111113,
"signal/format_reward/group_std_mean": 0.05369042381644249,
"signal/format_reward/group_zero_std_frac": 0.7861111283302307,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014610459841787816,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.014610459841787816,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014508004300296307,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6725694444444443,
"signal/frontier_aurc_reward/group_std_mean": 0.0023758172057569025,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8135006212105508e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8135006212105508e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.2051199734210968,
"signal/frontier_coverage_0/group_bin_occupancy": 0.85625,
"signal/frontier_coverage_0/group_std_mean": 0.2649015933275223,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025639997329562902,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025639997329562902,
"signal/frontier_coverage_1/centered_abs_mean": 0.2051199734210968,
"signal/frontier_coverage_1/group_bin_occupancy": 0.85625,
"signal/frontier_coverage_1/group_std_mean": 0.2649015933275223,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025639997329562902,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025639997329562902,
"signal/frontier_coverage_10/centered_abs_mean": 0.2051199734210968,
"signal/frontier_coverage_10/group_bin_occupancy": 0.85625,
"signal/frontier_coverage_10/group_std_mean": 0.2649015933275223,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025639997329562902,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025639997329562902,
"signal/frontier_coverage_15/centered_abs_mean": 0.2051199734210968,
"signal/frontier_coverage_15/group_bin_occupancy": 0.85625,
"signal/frontier_coverage_15/group_std_mean": 0.2649015933275223,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025639997329562902,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025639997329562902,
"signal/frontier_coverage_20/centered_abs_mean": 0.2051199734210968,
"signal/frontier_coverage_20/group_bin_occupancy": 0.85625,
"signal/frontier_coverage_20/group_std_mean": 0.2649015933275223,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025639997329562902,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025639997329562902,
"signal/frontier_coverage_25/centered_abs_mean": 0.2051199734210968,
"signal/frontier_coverage_25/group_bin_occupancy": 0.85625,
"signal/frontier_coverage_25/group_std_mean": 0.2649015933275223,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025639997329562902,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025639997329562902,
"signal/frontier_coverage_5/centered_abs_mean": 0.2051199734210968,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85625,
"signal/frontier_coverage_5/group_std_mean": 0.2649015933275223,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025639997329562902,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025639997329562902,
"signal/frontier_ece_reward/centered_abs_mean": 0.03703099712729454,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8190972222222224,
"signal/frontier_ece_reward/group_std_mean": 0.051789505034685136,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037030997220426796,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037030997220426796,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31766087412834165,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.757638888888889,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3910989761352539,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03176608793437481,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03176608793437481,
"step": 105
},
{
"calibration/aurc": 0.19799323292635163,
"calibration/batch_distribution_entropy": 0.9537625669630876,
"calibration/batch_entropy_100bins": 0.9453870980796439,
"calibration/batch_entropy_10bins": 0.9537625669630876,
"calibration/batch_entropy_50bins": 0.9547702004879739,
"calibration/batch_uniqueness": 0.9466343035079527,
"calibration/buffer_distribution_entropy": 0.9240580653843992,
"calibration/buffer_entropy_100bins": 0.9147163586526406,
"calibration/buffer_entropy_10bins": 0.9240580653843992,
"calibration/buffer_entropy_50bins": 0.9315463111926698,
"calibration/confidence_entropy": 0.4835072146320634,
"calibration/coverage@0%": 0.035680901908089514,
"calibration/coverage@1%": 0.035680901908089514,
"calibration/coverage@10%": 0.24178913328134474,
"calibration/coverage@15%": 0.31315851191612476,
"calibration/coverage@20%": 0.4733090167170541,
"calibration/coverage@25%": 0.7230019075321643,
"calibration/coverage@30%": 0.8897233502388046,
"calibration/coverage@5%": 0.08753672359530008,
"calibration/ece": 0.14042280691981804,
"calibration/mean_confidence": 0.6066246764748546,
"calibration/prompt_uniqueness": 0.8641427667407416,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.016232638888888908,
"completions/max_length": 3394.8,
"completions/max_terminated_length": 3394.8,
"completions/mean_length": 746.4253662109375,
"completions/mean_terminated_length": 758.7831420898438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 249.6,
"epoch": 0.2639967000412495,
"grad_norm": 0.0003848325868602842,
"learning_rate": 2.9518072289156627e-06,
"loss": -0.0136,
"num_tokens": 237616224.0,
"reward": 0.9861885070800781,
"reward_std": 0.13821190893650054,
"rewards/accuracy_reward": 0.7033854126930237,
"rewards/brier_reward": 0.7831946849822998,
"rewards/confidence_uniqueness_reward": 0.9336396098136902,
"rewards/format_reward": 0.9835069417953491,
"rewards/frontier_aurc_reward": -0.0013050575507804751,
"rewards/frontier_coverage_0": -0.011092400312190876,
"rewards/frontier_coverage_1": -0.011092400312190876,
"rewards/frontier_coverage_10": -0.011092400312190876,
"rewards/frontier_coverage_15": -0.011092400312190876,
"rewards/frontier_coverage_20": -0.011092400312190876,
"rewards/frontier_coverage_25": -0.011092400312190876,
"rewards/frontier_coverage_5": -0.011092400312190876,
"rewards/frontier_ece_reward": 0.008409860450774432,
"rewards/frontier_entropy_batch_reward": -0.2879524528980255,
"signal/accuracy_reward/centered_abs_mean": 0.15787217915058135,
"signal/accuracy_reward/group_bin_occupancy": 0.20694444444444446,
"signal/accuracy_reward/group_std_mean": 0.21840295791625977,
"signal/accuracy_reward/group_zero_std_frac": 0.3444444537162781,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07893608957529068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07893608957529068,
"signal/advantage_abs_mean": 0.09958325326442719,
"signal/advantage_pre_scale_abs_mean": 0.09958325326442719,
"signal/advantage_pre_scale_std": 0.16226766407489776,
"signal/advantage_std": 0.16226766407489776,
"signal/brier_reward/centered_abs_mean": 0.15236919820308686,
"signal/brier_reward/group_bin_occupancy": 0.8368055555555556,
"signal/brier_reward/group_std_mean": 0.19616940319538118,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01523692011833191,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01523692011833191,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03703809753060341,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8079861111111111,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06205111965537071,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037038099486380815,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037038099486380815,
"signal/format_reward/centered_abs_mean": 0.02634548582136631,
"signal/format_reward/group_bin_occupancy": 0.15104166666666669,
"signal/format_reward/group_std_mean": 0.049515650421380994,
"signal/format_reward/group_zero_std_frac": 0.7916666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013172742910683155,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013172742910683155,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014365239767357707,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6795138888888889,
"signal/frontier_aurc_reward/group_std_mean": 0.0023579075932502747,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7956549891096073e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7956549891096073e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1875341057777405,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8322916666666667,
"signal/frontier_coverage_0/group_std_mean": 0.24931617081165314,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002344176359474659,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002344176359474659,
"signal/frontier_coverage_1/centered_abs_mean": 0.1875341057777405,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8322916666666667,
"signal/frontier_coverage_1/group_std_mean": 0.24931617081165314,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002344176359474659,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002344176359474659,
"signal/frontier_coverage_10/centered_abs_mean": 0.1875341057777405,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8322916666666667,
"signal/frontier_coverage_10/group_std_mean": 0.24931617081165314,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002344176359474659,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002344176359474659,
"signal/frontier_coverage_15/centered_abs_mean": 0.1875341057777405,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8322916666666667,
"signal/frontier_coverage_15/group_std_mean": 0.24931617081165314,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002344176359474659,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002344176359474659,
"signal/frontier_coverage_20/centered_abs_mean": 0.1875341057777405,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8322916666666667,
"signal/frontier_coverage_20/group_std_mean": 0.24931617081165314,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002344176359474659,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002344176359474659,
"signal/frontier_coverage_25/centered_abs_mean": 0.1875341057777405,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8322916666666667,
"signal/frontier_coverage_25/group_std_mean": 0.24931617081165314,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002344176359474659,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002344176359474659,
"signal/frontier_coverage_5/centered_abs_mean": 0.1875341057777405,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8322916666666667,
"signal/frontier_coverage_5/group_std_mean": 0.24931617081165314,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002344176359474659,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002344176359474659,
"signal/frontier_ece_reward/centered_abs_mean": 0.034382133185863493,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8138888888888889,
"signal/frontier_ece_reward/group_std_mean": 0.049156392365694045,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0034382133278995754,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0034382133278995754,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33333885073661806,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7628472222222222,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4037556827068329,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03333388455212116,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03333388455212116,
"step": 110
},
{
"calibration/aurc": 0.287285473949784,
"calibration/batch_distribution_entropy": 0.9682973508384333,
"calibration/batch_entropy_100bins": 0.9555438616131868,
"calibration/batch_entropy_10bins": 0.9682973508384333,
"calibration/batch_entropy_50bins": 0.9667426257666565,
"calibration/batch_uniqueness": 0.9504674113351846,
"calibration/buffer_distribution_entropy": 0.9277634548376474,
"calibration/buffer_entropy_100bins": 0.9200476103891649,
"calibration/buffer_entropy_10bins": 0.9277634548376474,
"calibration/buffer_entropy_50bins": 0.9356148829884694,
"calibration/confidence_entropy": 0.5307235518352804,
"calibration/coverage@0%": 0.004751471720715325,
"calibration/coverage@1%": 0.004751471720715325,
"calibration/coverage@10%": 0.052850640633563484,
"calibration/coverage@15%": 0.2552780215859444,
"calibration/coverage@20%": 0.41941420756977194,
"calibration/coverage@25%": 0.5522348050540486,
"calibration/coverage@30%": 0.6566724394626509,
"calibration/coverage@5%": 0.004751471720715325,
"calibration/ece": 0.1892431956188166,
"calibration/mean_confidence": 0.5415657741341013,
"calibration/prompt_uniqueness": 0.8632119764295343,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.016232638888888908,
"completions/max_length": 3814.8,
"completions/max_terminated_length": 3814.8,
"completions/mean_length": 740.032568359375,
"completions/mean_terminated_length": 752.2534545898437,
"completions/min_length": 0.0,
"completions/min_terminated_length": 238.6,
"epoch": 0.27599655004312446,
"grad_norm": 0.00034388055792078376,
"learning_rate": 2.8012048192771087e-06,
"loss": -0.0136,
"num_tokens": 249220599.0,
"reward": 0.9712253212928772,
"reward_std": 0.13757235705852508,
"rewards/accuracy_reward": 0.6709201455116272,
"rewards/brier_reward": 0.7752394318580628,
"rewards/confidence_uniqueness_reward": 0.933931851387024,
"rewards/format_reward": 0.98359375,
"rewards/frontier_aurc_reward": -0.0014276995789259672,
"rewards/frontier_coverage_0": 0.0024690252728760244,
"rewards/frontier_coverage_1": 0.0024690252728760244,
"rewards/frontier_coverage_10": 0.0024690252728760244,
"rewards/frontier_coverage_15": 0.0024690252728760244,
"rewards/frontier_coverage_20": 0.0024690252728760244,
"rewards/frontier_coverage_25": 0.0024690252728760244,
"rewards/frontier_coverage_5": 0.0024690252728760244,
"rewards/frontier_ece_reward": 0.00639819772914052,
"rewards/frontier_entropy_batch_reward": -0.2778678983449936,
"signal/accuracy_reward/centered_abs_mean": 0.1559516042470932,
"signal/accuracy_reward/group_bin_occupancy": 0.19861111111111113,
"signal/accuracy_reward/group_std_mean": 0.20581479370594025,
"signal/accuracy_reward/group_zero_std_frac": 0.4111111104488373,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0779758021235466,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0779758021235466,
"signal/advantage_abs_mean": 0.09996644854545593,
"signal/advantage_pre_scale_abs_mean": 0.09996644854545593,
"signal/advantage_pre_scale_std": 0.16126873791217805,
"signal/advantage_std": 0.16126873791217805,
"signal/brier_reward/centered_abs_mean": 0.15016718208789825,
"signal/brier_reward/group_bin_occupancy": 0.8506944444444444,
"signal/brier_reward/group_std_mean": 0.19337638914585115,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015016718581318856,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015016718581318856,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03684631027281284,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7972222222222223,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06412606909871102,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003684631362557411,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003684631362557411,
"signal/format_reward/centered_abs_mean": 0.02667643241584301,
"signal/format_reward/group_bin_occupancy": 0.15243055555555557,
"signal/format_reward/group_std_mean": 0.05222913697361946,
"signal/format_reward/group_zero_std_frac": 0.7805555582046508,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013338216207921505,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.013338216207921505,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001323050889186561,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6850694444444445,
"signal/frontier_aurc_reward/group_std_mean": 0.0021448110230267046,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6538135969312862e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6538135969312862e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18924466371536255,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8597222222222223,
"signal/frontier_coverage_0/group_std_mean": 0.24632638990879058,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023655582685023547,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023655582685023547,
"signal/frontier_coverage_1/centered_abs_mean": 0.18924466371536255,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8597222222222223,
"signal/frontier_coverage_1/group_std_mean": 0.24632638990879058,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023655582685023547,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023655582685023547,
"signal/frontier_coverage_10/centered_abs_mean": 0.18924466371536255,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8597222222222223,
"signal/frontier_coverage_10/group_std_mean": 0.24632638990879058,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023655582685023547,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023655582685023547,
"signal/frontier_coverage_15/centered_abs_mean": 0.18924466371536255,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8597222222222223,
"signal/frontier_coverage_15/group_std_mean": 0.24632638990879058,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023655582685023547,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023655582685023547,
"signal/frontier_coverage_20/centered_abs_mean": 0.18924466371536255,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8597222222222223,
"signal/frontier_coverage_20/group_std_mean": 0.24632638990879058,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023655582685023547,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023655582685023547,
"signal/frontier_coverage_25/centered_abs_mean": 0.18924466371536255,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8597222222222223,
"signal/frontier_coverage_25/group_std_mean": 0.24632638990879058,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023655582685023547,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023655582685023547,
"signal/frontier_coverage_5/centered_abs_mean": 0.18924466371536255,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8597222222222223,
"signal/frontier_coverage_5/group_std_mean": 0.24632638990879058,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023655582685023547,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023655582685023547,
"signal/frontier_ece_reward/centered_abs_mean": 0.03106148950755596,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8159722222222221,
"signal/frontier_ece_reward/group_std_mean": 0.043675854057073596,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031061490997672083,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031061490997672083,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3323960185050964,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.759375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4029895007610321,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03323960341513157,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03323960341513157,
"step": 115
},
{
"calibration/aurc": 0.2857290173805331,
"calibration/batch_distribution_entropy": 0.9630372236339821,
"calibration/batch_entropy_100bins": 0.9529791109323151,
"calibration/batch_entropy_10bins": 0.9630372236339821,
"calibration/batch_entropy_50bins": 0.9638178813465694,
"calibration/batch_uniqueness": 0.9493225460778467,
"calibration/buffer_distribution_entropy": 0.932573310927301,
"calibration/buffer_entropy_100bins": 0.9254646116102905,
"calibration/buffer_entropy_10bins": 0.932573310927301,
"calibration/buffer_entropy_50bins": 0.9401181111666196,
"calibration/confidence_entropy": 0.5012522680111101,
"calibration/coverage@0%": 0.010983857117787824,
"calibration/coverage@1%": 0.010983857117787824,
"calibration/coverage@10%": 0.16000526703945886,
"calibration/coverage@15%": 0.3665599237179272,
"calibration/coverage@20%": 0.5034456617136492,
"calibration/coverage@25%": 0.5335280661718852,
"calibration/coverage@30%": 0.5718207547816314,
"calibration/coverage@5%": 0.015683596021182085,
"calibration/ece": 0.19073780715158475,
"calibration/mean_confidence": 0.583309509564421,
"calibration/prompt_uniqueness": 0.8659640504197897,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011284722222222255,
"completions/max_length": 3415.4,
"completions/max_terminated_length": 3415.4,
"completions/mean_length": 733.89296875,
"completions/mean_terminated_length": 742.2210571289063,
"completions/min_length": 0.0,
"completions/min_terminated_length": 258.8,
"epoch": 0.28799640004499943,
"grad_norm": 0.0003453529498074204,
"learning_rate": 2.6506024096385547e-06,
"loss": -0.0095,
"num_tokens": 260756902.0,
"reward": 0.9813999176025391,
"reward_std": 0.137198106944561,
"rewards/accuracy_reward": 0.6784722208976746,
"rewards/brier_reward": 0.7936996936798095,
"rewards/confidence_uniqueness_reward": 0.938444995880127,
"rewards/format_reward": 0.9886284828186035,
"rewards/frontier_aurc_reward": -0.0014351831981912256,
"rewards/frontier_coverage_0": 0.012847778201103211,
"rewards/frontier_coverage_1": 0.012847778201103211,
"rewards/frontier_coverage_10": 0.012847778201103211,
"rewards/frontier_coverage_15": 0.012847778201103211,
"rewards/frontier_coverage_20": 0.012847778201103211,
"rewards/frontier_coverage_25": 0.012847778201103211,
"rewards/frontier_coverage_5": 0.012847778201103211,
"rewards/frontier_ece_reward": 0.009404824767261743,
"rewards/frontier_entropy_batch_reward": -0.2741163432598114,
"signal/accuracy_reward/centered_abs_mean": 0.1679144948720932,
"signal/accuracy_reward/group_bin_occupancy": 0.2034722222222222,
"signal/accuracy_reward/group_std_mean": 0.22080156803131104,
"signal/accuracy_reward/group_zero_std_frac": 0.37222222089767454,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0839572474360466,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0839572474360466,
"signal/advantage_abs_mean": 0.10121523141860962,
"signal/advantage_pre_scale_abs_mean": 0.10121523141860962,
"signal/advantage_pre_scale_std": 0.16033710837364196,
"signal/advantage_std": 0.16033710837364196,
"signal/brier_reward/centered_abs_mean": 0.14215776324272156,
"signal/brier_reward/group_bin_occupancy": 0.8395833333333332,
"signal/brier_reward/group_std_mean": 0.18405098021030425,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014215776138007641,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014215776138007641,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031229404360055925,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.820486111111111,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05529859885573387,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003122940473258495,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003122940473258495,
"signal/format_reward/centered_abs_mean": 0.020024956576526164,
"signal/format_reward/group_bin_occupancy": 0.14861111111111108,
"signal/format_reward/group_std_mean": 0.04193191379308701,
"signal/format_reward/group_zero_std_frac": 0.8111111283302307,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010012478288263082,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010012478288263082,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015335174975916743,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6753472222222221,
"signal/frontier_aurc_reward/group_std_mean": 0.0024902403354644777,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.916896871989593e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.916896871989593e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.17784596085548401,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8430555555555556,
"signal/frontier_coverage_0/group_std_mean": 0.23407686352729798,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022230746224522592,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022230746224522592,
"signal/frontier_coverage_1/centered_abs_mean": 0.17784596085548401,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8430555555555556,
"signal/frontier_coverage_1/group_std_mean": 0.23407686352729798,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022230746224522592,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022230746224522592,
"signal/frontier_coverage_10/centered_abs_mean": 0.17784596085548401,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8430555555555556,
"signal/frontier_coverage_10/group_std_mean": 0.23407686352729798,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022230746224522592,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022230746224522592,
"signal/frontier_coverage_15/centered_abs_mean": 0.17784596085548401,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8430555555555556,
"signal/frontier_coverage_15/group_std_mean": 0.23407686352729798,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022230746224522592,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022230746224522592,
"signal/frontier_coverage_20/centered_abs_mean": 0.17784596085548401,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8430555555555556,
"signal/frontier_coverage_20/group_std_mean": 0.23407686352729798,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022230746224522592,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022230746224522592,
"signal/frontier_coverage_25/centered_abs_mean": 0.17784596085548401,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8430555555555556,
"signal/frontier_coverage_25/group_std_mean": 0.23407686352729798,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022230746224522592,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022230746224522592,
"signal/frontier_coverage_5/centered_abs_mean": 0.17784596085548401,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8430555555555556,
"signal/frontier_coverage_5/group_std_mean": 0.23407686352729798,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022230746224522592,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022230746224522592,
"signal/frontier_ece_reward/centered_abs_mean": 0.03231002166867256,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8104166666666668,
"signal/frontier_ece_reward/group_std_mean": 0.04614760801196098,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032310022041201593,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032310022041201593,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3172143340110779,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38740280270576477,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03172143436968326,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03172143436968326,
"step": 120
},
{
"calibration/aurc": 0.1575635410107588,
"calibration/batch_distribution_entropy": 0.9612510163398869,
"calibration/batch_entropy_100bins": 0.9552380496503574,
"calibration/batch_entropy_10bins": 0.9612510163398869,
"calibration/batch_entropy_50bins": 0.9641717634207829,
"calibration/batch_uniqueness": 0.949993854889337,
"calibration/buffer_distribution_entropy": 0.9353462564740422,
"calibration/buffer_entropy_100bins": 0.9296285255700507,
"calibration/buffer_entropy_10bins": 0.9353462564740422,
"calibration/buffer_entropy_50bins": 0.9432362064347739,
"calibration/confidence_entropy": 0.5031603083743401,
"calibration/coverage@0%": 0.13024148292317878,
"calibration/coverage@1%": 0.1729498162565121,
"calibration/coverage@10%": 0.3448486815291011,
"calibration/coverage@15%": 0.44466883739799756,
"calibration/coverage@20%": 0.6997469786467916,
"calibration/coverage@25%": 0.8670727387361822,
"calibration/coverage@30%": 0.9466780773070889,
"calibration/coverage@5%": 0.2884318818261089,
"calibration/ece": 0.17434127126435955,
"calibration/mean_confidence": 0.5888708948278307,
"calibration/prompt_uniqueness": 0.8624025232593183,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011024305555555558,
"completions/max_length": 3571.4,
"completions/max_terminated_length": 3571.4,
"completions/mean_length": 738.0600708007812,
"completions/mean_terminated_length": 746.339111328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 242.0,
"epoch": 0.2999962500468744,
"grad_norm": 0.00036861843545921147,
"learning_rate": 2.5e-06,
"loss": -0.0104,
"num_tokens": 272377018.0,
"reward": 0.9875968217849731,
"reward_std": 0.12884068638086318,
"rewards/accuracy_reward": 0.6880208253860474,
"rewards/brier_reward": 0.7970276832580566,
"rewards/confidence_uniqueness_reward": 0.9396448731422424,
"rewards/format_reward": 0.9886284708976746,
"rewards/frontier_aurc_reward": -0.0011375241447240114,
"rewards/frontier_coverage_0": 0.01211215639486909,
"rewards/frontier_coverage_1": 0.01211215639486909,
"rewards/frontier_coverage_10": 0.01211215639486909,
"rewards/frontier_coverage_15": 0.01211215639486909,
"rewards/frontier_coverage_20": 0.01211215639486909,
"rewards/frontier_coverage_25": 0.01211215639486909,
"rewards/frontier_coverage_5": 0.01211215639486909,
"rewards/frontier_ece_reward": 0.008880946971476077,
"rewards/frontier_entropy_batch_reward": -0.26328781247138977,
"signal/accuracy_reward/centered_abs_mean": 0.1634006083011627,
"signal/accuracy_reward/group_bin_occupancy": 0.1982638888888889,
"signal/accuracy_reward/group_std_mean": 0.21180324256420135,
"signal/accuracy_reward/group_zero_std_frac": 0.4138888955116272,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08170030415058135,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08170030415058135,
"signal/advantage_abs_mean": 0.09549782574176788,
"signal/advantage_pre_scale_abs_mean": 0.09549782574176788,
"signal/advantage_pre_scale_std": 0.15226317346096038,
"signal/advantage_std": 0.15226317346096038,
"signal/brier_reward/centered_abs_mean": 0.14393920302391053,
"signal/brier_reward/group_bin_occupancy": 0.8489583333333334,
"signal/brier_reward/group_std_mean": 0.1842961460351944,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014393920078873634,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014393920078873634,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0306204479187727,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8493055555555555,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05007597878575325,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030620446428656577,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030620446428656577,
"signal/format_reward/centered_abs_mean": 0.01981879323720932,
"signal/format_reward/group_bin_occupancy": 0.14375,
"signal/format_reward/group_std_mean": 0.037004124373197556,
"signal/format_reward/group_zero_std_frac": 0.8499999880790711,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00990939661860466,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00990939661860466,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001247124606743455,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6756944444444444,
"signal/frontier_aurc_reward/group_std_mean": 0.002091983216814697,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.558905732963467e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.558905732963467e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1979488104581833,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_coverage_0/group_std_mean": 0.25512219667434693,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024743602611124516,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024743602611124516,
"signal/frontier_coverage_1/centered_abs_mean": 0.1979488104581833,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_coverage_1/group_std_mean": 0.25512219667434693,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024743602611124516,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024743602611124516,
"signal/frontier_coverage_10/centered_abs_mean": 0.1979488104581833,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_coverage_10/group_std_mean": 0.25512219667434693,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024743602611124516,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024743602611124516,
"signal/frontier_coverage_15/centered_abs_mean": 0.1979488104581833,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_coverage_15/group_std_mean": 0.25512219667434693,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024743602611124516,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024743602611124516,
"signal/frontier_coverage_20/centered_abs_mean": 0.1979488104581833,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_coverage_20/group_std_mean": 0.25512219667434693,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024743602611124516,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024743602611124516,
"signal/frontier_coverage_25/centered_abs_mean": 0.1979488104581833,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_coverage_25/group_std_mean": 0.25512219667434693,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024743602611124516,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024743602611124516,
"signal/frontier_coverage_5/centered_abs_mean": 0.1979488104581833,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_coverage_5/group_std_mean": 0.25512219667434693,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024743602611124516,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024743602611124516,
"signal/frontier_ece_reward/centered_abs_mean": 0.032735417038202284,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8017361111111111,
"signal/frontier_ece_reward/group_std_mean": 0.0460764616727829,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032735418528318403,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032735418528318403,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31857306957244874,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7579861111111111,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39109026789665224,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0318573072552681,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0318573072552681,
"step": 125
},
{
"calibration/aurc": 0.20780328574617007,
"calibration/batch_distribution_entropy": 0.9582987992066305,
"calibration/batch_entropy_100bins": 0.9515668397932107,
"calibration/batch_entropy_10bins": 0.9582987992066305,
"calibration/batch_entropy_50bins": 0.9617035518413081,
"calibration/batch_uniqueness": 0.9484267124407983,
"calibration/buffer_distribution_entropy": 0.9379300572922322,
"calibration/buffer_entropy_100bins": 0.9333226128121981,
"calibration/buffer_entropy_10bins": 0.9379300572922322,
"calibration/buffer_entropy_50bins": 0.9460268584755,
"calibration/confidence_entropy": 0.48645760072603716,
"calibration/coverage@0%": 0.03253532995344596,
"calibration/coverage@1%": 0.03253532995344596,
"calibration/coverage@10%": 0.24292767469988902,
"calibration/coverage@15%": 0.3801627540686855,
"calibration/coverage@20%": 0.5748314186914957,
"calibration/coverage@25%": 0.7332900747954306,
"calibration/coverage@30%": 0.8432733894832689,
"calibration/coverage@5%": 0.061231472001795406,
"calibration/ece": 0.11196021170386979,
"calibration/mean_confidence": 0.571842256393492,
"calibration/prompt_uniqueness": 0.8573626933942176,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.017100694444444443,
"completions/max_length": 3418.2,
"completions/max_terminated_length": 3418.2,
"completions/mean_length": 769.5232666015625,
"completions/mean_terminated_length": 782.9178833007812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 225.6,
"epoch": 0.3119961000487494,
"grad_norm": 0.00037062042974866927,
"learning_rate": 2.349397590361446e-06,
"loss": -0.0142,
"num_tokens": 284366726.0,
"reward": 0.9679849982261658,
"reward_std": 0.14072301387786865,
"rewards/accuracy_reward": 0.659374988079071,
"rewards/brier_reward": 0.7807927131652832,
"rewards/confidence_uniqueness_reward": 0.9330769419670105,
"rewards/format_reward": 0.9828993082046509,
"rewards/frontier_aurc_reward": -0.0013563590357080102,
"rewards/frontier_coverage_0": 0.017163947224617004,
"rewards/frontier_coverage_1": 0.017163947224617004,
"rewards/frontier_coverage_10": 0.017163947224617004,
"rewards/frontier_coverage_15": 0.017163947224617004,
"rewards/frontier_coverage_20": 0.017163947224617004,
"rewards/frontier_coverage_25": 0.017163947224617004,
"rewards/frontier_coverage_5": 0.017163947224617004,
"rewards/frontier_ece_reward": 0.008727512508630752,
"rewards/frontier_entropy_batch_reward": -0.26896790862083436,
"signal/accuracy_reward/centered_abs_mean": 0.1755316823720932,
"signal/accuracy_reward/group_bin_occupancy": 0.20868055555555554,
"signal/accuracy_reward/group_std_mean": 0.23276266753673552,
"signal/accuracy_reward/group_zero_std_frac": 0.3305555611848831,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0877658411860466,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0877658411860466,
"signal/advantage_abs_mean": 0.10393321365118027,
"signal/advantage_pre_scale_abs_mean": 0.10393321365118027,
"signal/advantage_pre_scale_std": 0.16393719911575316,
"signal/advantage_std": 0.16393719911575316,
"signal/brier_reward/centered_abs_mean": 0.14976280629634858,
"signal/brier_reward/group_bin_occupancy": 0.8305555555555555,
"signal/brier_reward/group_std_mean": 0.19258086383342743,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014976280741393566,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014976280741393566,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03748470433056354,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8097222222222221,
"signal/confidence_uniqueness_reward/group_std_mean": 0.059673815965652466,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00374847031198442,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00374847031198442,
"signal/format_reward/centered_abs_mean": 0.02657877579331398,
"signal/format_reward/group_bin_occupancy": 0.1482638888888889,
"signal/format_reward/group_std_mean": 0.04673202857375145,
"signal/format_reward/group_zero_std_frac": 0.8138889074325562,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01328938789665699,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01328938789665699,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014108764240518211,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6774305555555555,
"signal/frontier_aurc_reward/group_std_mean": 0.0023058691993355753,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7635955009609462e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7635955009609462e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.20245161652565002,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8277777777777777,
"signal/frontier_coverage_0/group_std_mean": 0.2637217164039612,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025306452065706254,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025306452065706254,
"signal/frontier_coverage_1/centered_abs_mean": 0.20245161652565002,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8277777777777777,
"signal/frontier_coverage_1/group_std_mean": 0.2637217164039612,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025306452065706254,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025306452065706254,
"signal/frontier_coverage_10/centered_abs_mean": 0.20245161652565002,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8277777777777777,
"signal/frontier_coverage_10/group_std_mean": 0.2637217164039612,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025306452065706254,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025306452065706254,
"signal/frontier_coverage_15/centered_abs_mean": 0.20245161652565002,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8277777777777777,
"signal/frontier_coverage_15/group_std_mean": 0.2637217164039612,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025306452065706254,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025306452065706254,
"signal/frontier_coverage_20/centered_abs_mean": 0.20245161652565002,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8277777777777777,
"signal/frontier_coverage_20/group_std_mean": 0.2637217164039612,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025306452065706254,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025306452065706254,
"signal/frontier_coverage_25/centered_abs_mean": 0.20245161652565002,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8277777777777777,
"signal/frontier_coverage_25/group_std_mean": 0.2637217164039612,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025306452065706254,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025306452065706254,
"signal/frontier_coverage_5/centered_abs_mean": 0.20245161652565002,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8277777777777777,
"signal/frontier_coverage_5/group_std_mean": 0.2637217164039612,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025306452065706254,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025306452065706254,
"signal/frontier_ece_reward/centered_abs_mean": 0.03250915594398975,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8149305555555555,
"signal/frontier_ece_reward/group_std_mean": 0.0450954794883728,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032509156968444585,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032509156968444585,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3180006206035614,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7440972222222222,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3872752785682678,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03180006295442581,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03180006295442581,
"step": 130
},
{
"calibration/aurc": 0.21932232197247176,
"calibration/batch_distribution_entropy": 0.9258906597036723,
"calibration/batch_entropy_100bins": 0.9298288127857933,
"calibration/batch_entropy_10bins": 0.9258906597036723,
"calibration/batch_entropy_50bins": 0.9390660590681558,
"calibration/batch_uniqueness": 0.9410424613002439,
"calibration/buffer_distribution_entropy": 0.9393241003274235,
"calibration/buffer_entropy_100bins": 0.9364226957038404,
"calibration/buffer_entropy_10bins": 0.9393241003274235,
"calibration/buffer_entropy_50bins": 0.9481017899292319,
"calibration/confidence_entropy": 0.4694250466975819,
"calibration/coverage@0%": 0.02605259564249495,
"calibration/coverage@1%": 0.06719842897582828,
"calibration/coverage@10%": 0.22309573868711435,
"calibration/coverage@15%": 0.28712263899767765,
"calibration/coverage@20%": 0.4689698033900867,
"calibration/coverage@25%": 0.6534364527418945,
"calibration/coverage@30%": 0.7781708725802566,
"calibration/coverage@5%": 0.1817817623091616,
"calibration/ece": 0.13053299917973787,
"calibration/mean_confidence": 0.6416413739982313,
"calibration/prompt_uniqueness": 0.8535478696842471,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010416666666666675,
"completions/max_length": 3378.2,
"completions/max_terminated_length": 3378.2,
"completions/mean_length": 756.0257080078125,
"completions/mean_terminated_length": 763.93759765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 229.6,
"epoch": 0.32399595005062437,
"grad_norm": 0.0004128075379412621,
"learning_rate": 2.1987951807228917e-06,
"loss": -0.0091,
"num_tokens": 296169166.0,
"reward": 0.9857806205749512,
"reward_std": 0.12979988008737564,
"rewards/accuracy_reward": 0.6876736164093018,
"rewards/brier_reward": 0.8059556722640991,
"rewards/confidence_uniqueness_reward": 0.9373222827911377,
"rewards/format_reward": 0.9894965291023254,
"rewards/frontier_aurc_reward": -0.0013067800784483552,
"rewards/frontier_coverage_0": 0.017236491234507413,
"rewards/frontier_coverage_1": 0.017236491234507413,
"rewards/frontier_coverage_10": 0.017236491234507413,
"rewards/frontier_coverage_15": 0.017236491234507413,
"rewards/frontier_coverage_20": 0.017236491234507413,
"rewards/frontier_coverage_25": 0.017236491234507413,
"rewards/frontier_coverage_5": 0.017236491234507413,
"rewards/frontier_ece_reward": 0.009917940944433212,
"rewards/frontier_entropy_batch_reward": -0.2961589753627777,
"signal/accuracy_reward/centered_abs_mean": 0.16046006828546525,
"signal/accuracy_reward/group_bin_occupancy": 0.2020833333333333,
"signal/accuracy_reward/group_std_mean": 0.2138714611530304,
"signal/accuracy_reward/group_zero_std_frac": 0.38333333730697633,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08023003414273262,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08023003414273262,
"signal/advantage_abs_mean": 0.09574576169252395,
"signal/advantage_pre_scale_abs_mean": 0.09574576169252395,
"signal/advantage_pre_scale_std": 0.15280175805091858,
"signal/advantage_std": 0.15280175805091858,
"signal/brier_reward/centered_abs_mean": 0.13488745987415313,
"signal/brier_reward/group_bin_occupancy": 0.8333333333333334,
"signal/brier_reward/group_std_mean": 0.17542927265167235,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013488745875656604,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013488745875656604,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03078622967004776,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8409722222222223,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05095992609858513,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003078623116016388,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003078623116016388,
"signal/format_reward/centered_abs_mean": 0.01808268241584301,
"signal/format_reward/group_bin_occupancy": 0.14409722222222224,
"signal/format_reward/group_std_mean": 0.03564814068377018,
"signal/format_reward/group_zero_std_frac": 0.8472222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009041341207921504,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009041341207921504,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015255101257935165,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.673611111111111,
"signal/frontier_aurc_reward/group_std_mean": 0.002493828348815441,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9068877008976415e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9068877008976415e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1737061321735382,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8340277777777777,
"signal/frontier_coverage_0/group_std_mean": 0.23108671605587006,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002171326708048582,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002171326708048582,
"signal/frontier_coverage_1/centered_abs_mean": 0.1737061321735382,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8340277777777777,
"signal/frontier_coverage_1/group_std_mean": 0.23108671605587006,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002171326708048582,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002171326708048582,
"signal/frontier_coverage_10/centered_abs_mean": 0.1737061321735382,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8340277777777777,
"signal/frontier_coverage_10/group_std_mean": 0.23108671605587006,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002171326708048582,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002171326708048582,
"signal/frontier_coverage_15/centered_abs_mean": 0.1737061321735382,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8340277777777777,
"signal/frontier_coverage_15/group_std_mean": 0.23108671605587006,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002171326708048582,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002171326708048582,
"signal/frontier_coverage_20/centered_abs_mean": 0.1737061321735382,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8340277777777777,
"signal/frontier_coverage_20/group_std_mean": 0.23108671605587006,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002171326708048582,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002171326708048582,
"signal/frontier_coverage_25/centered_abs_mean": 0.1737061321735382,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8340277777777777,
"signal/frontier_coverage_25/group_std_mean": 0.23108671605587006,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002171326708048582,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002171326708048582,
"signal/frontier_coverage_5/centered_abs_mean": 0.1737061321735382,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8340277777777777,
"signal/frontier_coverage_5/group_std_mean": 0.23108671605587006,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002171326708048582,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002171326708048582,
"signal/frontier_ece_reward/centered_abs_mean": 0.029922238364815713,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7881944444444444,
"signal/frontier_ece_reward/group_std_mean": 0.042552655935287474,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0029922238551080226,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0029922238551080226,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3231747329235077,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7611111111111111,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3919511318206787,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03231747336685657,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03231747336685657,
"step": 135
},
{
"calibration/aurc": 0.13275932023850517,
"calibration/batch_distribution_entropy": 0.9531096852111075,
"calibration/batch_entropy_100bins": 0.9454761453048437,
"calibration/batch_entropy_10bins": 0.9531096852111075,
"calibration/batch_entropy_50bins": 0.9589858393962905,
"calibration/batch_uniqueness": 0.9471837042566907,
"calibration/buffer_distribution_entropy": 0.9434576189595149,
"calibration/buffer_entropy_100bins": 0.9426846675065835,
"calibration/buffer_entropy_10bins": 0.9434576189595149,
"calibration/buffer_entropy_50bins": 0.9526372281432562,
"calibration/confidence_entropy": 0.48488417640325493,
"calibration/coverage@0%": 0.09119434414782376,
"calibration/coverage@1%": 0.09119434414782376,
"calibration/coverage@10%": 0.44083397809732333,
"calibration/coverage@15%": 0.6239463372141985,
"calibration/coverage@20%": 0.7766443894128876,
"calibration/coverage@25%": 0.8689414115682064,
"calibration/coverage@30%": 0.9499119261161872,
"calibration/coverage@5%": 0.2181709186000814,
"calibration/ece": 0.1283245177662963,
"calibration/mean_confidence": 0.592435027573537,
"calibration/prompt_uniqueness": 0.8549437105896963,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011284722222222232,
"completions/max_length": 3542.4,
"completions/max_terminated_length": 3542.4,
"completions/mean_length": 756.4568603515625,
"completions/mean_terminated_length": 765.0693115234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 255.0,
"epoch": 0.33599580005249935,
"grad_norm": 0.0003396812826395035,
"learning_rate": 2.0481927710843377e-06,
"loss": -0.0098,
"num_tokens": 307987773.0,
"reward": 0.9830217599868775,
"reward_std": 0.12660375535488128,
"rewards/accuracy_reward": 0.682812488079071,
"rewards/brier_reward": 0.7918254256248474,
"rewards/confidence_uniqueness_reward": 0.9390221238136292,
"rewards/format_reward": 0.9885416746139526,
"rewards/frontier_aurc_reward": -0.0011309069814160466,
"rewards/frontier_coverage_0": 0.0069188129156827925,
"rewards/frontier_coverage_1": 0.0069188129156827925,
"rewards/frontier_coverage_10": 0.0069188129156827925,
"rewards/frontier_coverage_15": 0.0069188129156827925,
"rewards/frontier_coverage_20": 0.0069188129156827925,
"rewards/frontier_coverage_25": 0.0069188129156827925,
"rewards/frontier_coverage_5": 0.0069188129156827925,
"rewards/frontier_ece_reward": 0.006748666008934379,
"rewards/frontier_entropy_batch_reward": -0.27006215155124663,
"signal/accuracy_reward/centered_abs_mean": 0.14768880009651184,
"signal/accuracy_reward/group_bin_occupancy": 0.1986111111111111,
"signal/accuracy_reward/group_std_mean": 0.20093624889850617,
"signal/accuracy_reward/group_zero_std_frac": 0.4111111104488373,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07384440004825592,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07384440004825592,
"signal/advantage_abs_mean": 0.09231802374124527,
"signal/advantage_pre_scale_abs_mean": 0.09231802374124527,
"signal/advantage_pre_scale_std": 0.14954468309879304,
"signal/advantage_std": 0.14954468309879304,
"signal/brier_reward/centered_abs_mean": 0.13755186796188354,
"signal/brier_reward/group_bin_occupancy": 0.8548611111111111,
"signal/brier_reward/group_std_mean": 0.17639783918857574,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013755187578499316,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013755187578499316,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03110237456858158,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8395833333333333,
"signal/confidence_uniqueness_reward/group_std_mean": 0.053076548129320146,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031102376524358988,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031102376524358988,
"signal/format_reward/centered_abs_mean": 0.02012803815305233,
"signal/format_reward/group_bin_occupancy": 0.14652777777777776,
"signal/format_reward/group_std_mean": 0.04009459167718887,
"signal/format_reward/group_zero_std_frac": 0.8277777910232544,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010064019076526164,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.010064019076526164,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001249980158172548,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.69375,
"signal/frontier_aurc_reward/group_std_mean": 0.0020357307279482485,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5624752268195153e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5624752268195153e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.17975709438323975,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8354166666666666,
"signal/frontier_coverage_0/group_std_mean": 0.23831536173820494,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022469636984169482,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022469636984169482,
"signal/frontier_coverage_1/centered_abs_mean": 0.17975709438323975,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8354166666666666,
"signal/frontier_coverage_1/group_std_mean": 0.23831536173820494,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022469636984169482,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022469636984169482,
"signal/frontier_coverage_10/centered_abs_mean": 0.17975709438323975,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8354166666666666,
"signal/frontier_coverage_10/group_std_mean": 0.23831536173820494,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022469636984169482,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022469636984169482,
"signal/frontier_coverage_15/centered_abs_mean": 0.17975709438323975,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8354166666666666,
"signal/frontier_coverage_15/group_std_mean": 0.23831536173820494,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022469636984169482,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022469636984169482,
"signal/frontier_coverage_20/centered_abs_mean": 0.17975709438323975,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8354166666666666,
"signal/frontier_coverage_20/group_std_mean": 0.23831536173820494,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022469636984169482,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022469636984169482,
"signal/frontier_coverage_25/centered_abs_mean": 0.17975709438323975,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8354166666666666,
"signal/frontier_coverage_25/group_std_mean": 0.23831536173820494,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022469636984169482,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022469636984169482,
"signal/frontier_coverage_5/centered_abs_mean": 0.17975709438323975,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8354166666666666,
"signal/frontier_coverage_5/group_std_mean": 0.23831536173820494,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022469636984169482,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022469636984169482,
"signal/frontier_ece_reward/centered_abs_mean": 0.02870168685913086,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8017361111111111,
"signal/frontier_ece_reward/group_std_mean": 0.04026328325271607,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0028701687697321177,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0028701687697321177,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32329471707344054,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7614583333333333,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3934563398361206,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03232947215437889,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03232947215437889,
"step": 140
},
{
"calibration/aurc": 0.17528265336611334,
"calibration/batch_distribution_entropy": 0.9844332458404799,
"calibration/batch_entropy_100bins": 0.9646424669896382,
"calibration/batch_entropy_10bins": 0.9844332458404799,
"calibration/batch_entropy_50bins": 0.9767757755701026,
"calibration/batch_uniqueness": 0.9529407609544613,
"calibration/buffer_distribution_entropy": 0.9548874553492734,
"calibration/buffer_entropy_100bins": 0.9550800614273213,
"calibration/buffer_entropy_10bins": 0.9548874553492734,
"calibration/buffer_entropy_50bins": 0.9626071569576704,
"calibration/confidence_entropy": 0.5026660064805346,
"calibration/coverage@0%": 0.038237593621836374,
"calibration/coverage@1%": 0.038237593621836374,
"calibration/coverage@10%": 0.33759787892501547,
"calibration/coverage@15%": 0.49998634037265716,
"calibration/coverage@20%": 0.6393046017984111,
"calibration/coverage@25%": 0.7433060670275571,
"calibration/coverage@30%": 0.8282726914970058,
"calibration/coverage@5%": 0.17173594361032143,
"calibration/ece": 0.15985296898612406,
"calibration/mean_confidence": 0.5081752724560155,
"calibration/prompt_uniqueness": 0.8541487763434162,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009201388888888907,
"completions/max_length": 3174.6,
"completions/max_terminated_length": 3174.6,
"completions/mean_length": 742.3033081054688,
"completions/mean_terminated_length": 749.2716918945313,
"completions/min_length": 0.0,
"completions/min_terminated_length": 260.2,
"epoch": 0.34799565005437433,
"grad_norm": 0.0003378766414243728,
"learning_rate": 1.8975903614457832e-06,
"loss": -0.0075,
"num_tokens": 319603715.0,
"reward": 1.0006329894065857,
"reward_std": 0.11812710911035537,
"rewards/accuracy_reward": 0.710069453716278,
"rewards/brier_reward": 0.7972426533699035,
"rewards/confidence_uniqueness_reward": 0.9429156422615051,
"rewards/format_reward": 0.9907118082046509,
"rewards/frontier_aurc_reward": -0.001022504735738039,
"rewards/frontier_coverage_0": -0.00022672154009342193,
"rewards/frontier_coverage_1": -0.00022672154009342193,
"rewards/frontier_coverage_10": -0.00022672154009342193,
"rewards/frontier_coverage_15": -0.00022672154009342193,
"rewards/frontier_coverage_20": -0.00022672154009342193,
"rewards/frontier_coverage_25": -0.0004101816564798355,
"rewards/frontier_coverage_5": -0.00022672154009342193,
"rewards/frontier_ece_reward": 0.004574788874015212,
"rewards/frontier_entropy_batch_reward": -0.24196033775806428,
"signal/accuracy_reward/centered_abs_mean": 0.14015841782093047,
"signal/accuracy_reward/group_bin_occupancy": 0.19618055555555558,
"signal/accuracy_reward/group_std_mean": 0.19147344529628754,
"signal/accuracy_reward/group_zero_std_frac": 0.4305555522441864,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07007920891046523,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07007920891046523,
"signal/advantage_abs_mean": 0.0849688932299614,
"signal/advantage_pre_scale_abs_mean": 0.0849688932299614,
"signal/advantage_pre_scale_std": 0.1410199522972107,
"signal/advantage_std": 0.1410199522972107,
"signal/brier_reward/centered_abs_mean": 0.1360908180475235,
"signal/brier_reward/group_bin_occupancy": 0.8277777777777777,
"signal/brier_reward/group_std_mean": 0.1774505376815796,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013609081320464612,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013609081320464612,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027258848026394843,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8402777777777779,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0479625403881073,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027258848771452905,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027258848771452905,
"signal/format_reward/centered_abs_mean": 0.01674804724752903,
"signal/format_reward/group_bin_occupancy": 0.1451388888888889,
"signal/format_reward/group_std_mean": 0.03558648675680161,
"signal/format_reward/group_zero_std_frac": 0.8388888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008374023623764515,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008374023623764515,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012428847374394536,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.684375,
"signal/frontier_aurc_reward/group_std_mean": 0.002172482665628195,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5536059800069778e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5536059800069778e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18805197179317473,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8288194444444444,
"signal/frontier_coverage_0/group_std_mean": 0.2451252043247223,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023506497032940387,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023506497032940387,
"signal/frontier_coverage_1/centered_abs_mean": 0.18805197179317473,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8288194444444444,
"signal/frontier_coverage_1/group_std_mean": 0.2451252043247223,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023506497032940387,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023506497032940387,
"signal/frontier_coverage_10/centered_abs_mean": 0.18805197179317473,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8288194444444444,
"signal/frontier_coverage_10/group_std_mean": 0.2451252043247223,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023506497032940387,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023506497032940387,
"signal/frontier_coverage_15/centered_abs_mean": 0.18805197179317473,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8288194444444444,
"signal/frontier_coverage_15/group_std_mean": 0.2451252043247223,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023506497032940387,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023506497032940387,
"signal/frontier_coverage_20/centered_abs_mean": 0.18805197179317473,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8288194444444444,
"signal/frontier_coverage_20/group_std_mean": 0.2451252043247223,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023506497032940387,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023506497032940387,
"signal/frontier_coverage_25/centered_abs_mean": 0.17898198664188386,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8277777777777778,
"signal/frontier_coverage_25/group_std_mean": 0.23354826867580414,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002237274823710322,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002237274823710322,
"signal/frontier_coverage_5/centered_abs_mean": 0.18805197179317473,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8288194444444444,
"signal/frontier_coverage_5/group_std_mean": 0.2451252043247223,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023506497032940387,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023506497032940387,
"signal/frontier_ece_reward/centered_abs_mean": 0.026790910586714744,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8034722222222224,
"signal/frontier_ece_reward/group_std_mean": 0.03704798519611359,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0026790911331772806,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0026790911331772806,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3096132218837738,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7600694444444444,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3817123532295227,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03096132315695286,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03096132315695286,
"step": 145
},
{
"calibration/aurc": 0.17615962878323987,
"calibration/batch_distribution_entropy": 0.9711179605405078,
"calibration/batch_entropy_100bins": 0.9558541064018309,
"calibration/batch_entropy_10bins": 0.9711179605405078,
"calibration/batch_entropy_50bins": 0.9667547349742606,
"calibration/batch_uniqueness": 0.9504408275880152,
"calibration/buffer_distribution_entropy": 0.9648408747010532,
"calibration/buffer_entropy_100bins": 0.9658663277206893,
"calibration/buffer_entropy_10bins": 0.9648408747010532,
"calibration/buffer_entropy_50bins": 0.9711379564362602,
"calibration/confidence_entropy": 0.47955049166475805,
"calibration/coverage@0%": 0.04777195843262925,
"calibration/coverage@1%": 0.04777195843262925,
"calibration/coverage@10%": 0.4098653974296026,
"calibration/coverage@15%": 0.5542765430095182,
"calibration/coverage@20%": 0.6567337578913923,
"calibration/coverage@25%": 0.7034798104684413,
"calibration/coverage@30%": 0.755544350661012,
"calibration/coverage@5%": 0.2888922753120794,
"calibration/ece": 0.18957569917445502,
"calibration/mean_confidence": 0.5315577019563095,
"calibration/prompt_uniqueness": 0.8578190975835357,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006076388888888884,
"completions/max_length": 3641.4,
"completions/max_terminated_length": 3641.4,
"completions/mean_length": 812.189501953125,
"completions/mean_terminated_length": 817.146875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 246.8,
"epoch": 0.3599955000562493,
"grad_norm": 0.0004547924909275025,
"learning_rate": 1.7469879518072292e-06,
"loss": -0.004,
"num_tokens": 332070474.0,
"reward": 0.991722309589386,
"reward_std": 0.12271568328142166,
"rewards/accuracy_reward": 0.6927951455116272,
"rewards/brier_reward": 0.8031431198120117,
"rewards/confidence_uniqueness_reward": 0.9440382122993469,
"rewards/format_reward": 0.9938367962837219,
"rewards/frontier_aurc_reward": -0.0011841925443150103,
"rewards/frontier_coverage_0": 0.011294396594166756,
"rewards/frontier_coverage_1": 0.011294396594166756,
"rewards/frontier_coverage_10": 0.011294396594166756,
"rewards/frontier_coverage_15": 0.011294396594166756,
"rewards/frontier_coverage_20": 0.012861622869968415,
"rewards/frontier_coverage_25": 0.03274488709867,
"rewards/frontier_coverage_5": 0.011294396594166756,
"rewards/frontier_ece_reward": 0.004732385440729558,
"rewards/frontier_entropy_batch_reward": -0.2804622292518616,
"signal/accuracy_reward/centered_abs_mean": 0.16096462458372116,
"signal/accuracy_reward/group_bin_occupancy": 0.20069444444444445,
"signal/accuracy_reward/group_std_mean": 0.212801730632782,
"signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08048231229186058,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08048231229186058,
"signal/advantage_abs_mean": 0.09234711825847626,
"signal/advantage_pre_scale_abs_mean": 0.09234711825847626,
"signal/advantage_pre_scale_std": 0.1431581974029541,
"signal/advantage_std": 0.1431581974029541,
"signal/brier_reward/centered_abs_mean": 0.137125688791275,
"signal/brier_reward/group_bin_occupancy": 0.8302083333333332,
"signal/brier_reward/group_std_mean": 0.17737070620059966,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013712569139897823,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013712569139897823,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023196187615394593,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8798611111111111,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03824953958392143,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002319618733599782,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002319618733599782,
"signal/format_reward/centered_abs_mean": 0.01061740443110466,
"signal/format_reward/group_bin_occupancy": 0.13854166666666667,
"signal/format_reward/group_std_mean": 0.023116332292556763,
"signal/format_reward/group_zero_std_frac": 0.8916666746139527,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00530870221555233,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00530870221555233,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001641688891686499,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6972222222222222,
"signal/frontier_aurc_reward/group_std_mean": 0.002830854058265686,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0521112674032338e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0521112674032338e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19024072587490082,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8246527777777779,
"signal/frontier_coverage_0/group_std_mean": 0.2502481758594513,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023780090268701315,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023780090268701315,
"signal/frontier_coverage_1/centered_abs_mean": 0.19024072587490082,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8246527777777779,
"signal/frontier_coverage_1/group_std_mean": 0.2502481758594513,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023780090268701315,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023780090268701315,
"signal/frontier_coverage_10/centered_abs_mean": 0.19024072587490082,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8246527777777779,
"signal/frontier_coverage_10/group_std_mean": 0.2502481758594513,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023780090268701315,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023780090268701315,
"signal/frontier_coverage_15/centered_abs_mean": 0.19024072587490082,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8246527777777779,
"signal/frontier_coverage_15/group_std_mean": 0.2502481758594513,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023780090268701315,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023780090268701315,
"signal/frontier_coverage_20/centered_abs_mean": 0.17767036259174346,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8194444444444444,
"signal/frontier_coverage_20/group_std_mean": 0.23440926969051362,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022208794951438906,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022208794951438906,
"signal/frontier_coverage_25/centered_abs_mean": 0.08696433901786804,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8604166666666668,
"signal/frontier_coverage_25/group_std_mean": 0.11614209264516831,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010870542610064149,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010870542610064149,
"signal/frontier_coverage_5/centered_abs_mean": 0.19024072587490082,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8246527777777779,
"signal/frontier_coverage_5/group_std_mean": 0.2502481758594513,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023780090268701315,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023780090268701315,
"signal/frontier_ece_reward/centered_abs_mean": 0.025189005583524705,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8534722222222223,
"signal/frontier_ece_reward/group_std_mean": 0.033189672976732254,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002518900623545051,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002518900623545051,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33967989683151245,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7621527777777779,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4091792941093445,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033967990428209305,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033967990428209305,
"step": 150
},
{
"epoch": 0.3599955000562493,
"eval_calibration/aurc": 0.12707819189206102,
"eval_calibration/batch_distribution_entropy": 0.9112518272923418,
"eval_calibration/batch_entropy_100bins": 0.7056907814916465,
"eval_calibration/batch_entropy_10bins": 0.9112518272923418,
"eval_calibration/batch_entropy_50bins": 0.7793820453071528,
"eval_calibration/batch_uniqueness": 0.8955338195407947,
"eval_calibration/buffer_distribution_entropy": 0.9699095211172789,
"eval_calibration/buffer_entropy_100bins": 0.971712014931799,
"eval_calibration/buffer_entropy_10bins": 0.9699095211172789,
"eval_calibration/buffer_entropy_50bins": 0.9756457318826595,
"eval_calibration/confidence_entropy": 0.4789857168138119,
"eval_calibration/coverage@0%": 0.27655689964157704,
"eval_calibration/coverage@1%": 0.27655689964157704,
"eval_calibration/coverage@10%": 0.5968413978494623,
"eval_calibration/coverage@15%": 0.6931339605734766,
"eval_calibration/coverage@20%": 0.9000336021505376,
"eval_calibration/coverage@25%": 0.9475806451612904,
"eval_calibration/coverage@30%": 0.9791666666666666,
"eval_calibration/coverage@5%": 0.31440412186379924,
"eval_calibration/ece": 0.2527974328369596,
"eval_calibration/mean_confidence": 0.5693722667953784,
"eval_calibration/prompt_uniqueness": 0.8955338195407947,
"eval_completions/clipped_ratio": 0.008680555555555544,
"eval_completions/max_length": 2432.1666666666665,
"eval_completions/max_terminated_length": 2432.1666666666665,
"eval_completions/mean_length": 768.3798828125,
"eval_completions/mean_terminated_length": 775.1339111328125,
"eval_completions/min_length": 102.83333333333333,
"eval_completions/min_terminated_length": 302.0,
"eval_loss": 0.0,
"eval_num_tokens": 332070474.0,
"eval_reward": 0.9103851417700449,
"eval_reward_std": 0.22593281418085098,
"eval_rewards/accuracy_reward": 0.6857638955116272,
"eval_rewards/brier_reward": 0.8014054795106252,
"eval_rewards/confidence_uniqueness_reward": 0.886686364809672,
"eval_rewards/format_reward": 0.9895833233992258,
"eval_rewards/frontier_aurc_reward": -0.001305475743720308,
"eval_rewards/frontier_coverage_0": 0.023123869051535923,
"eval_rewards/frontier_coverage_1": 0.023123869051535923,
"eval_rewards/frontier_coverage_10": 0.023123869051535923,
"eval_rewards/frontier_coverage_15": 0.023123869051535923,
"eval_rewards/frontier_coverage_20": 0.027419194191073377,
"eval_rewards/frontier_coverage_25": 0.0541337039321661,
"eval_rewards/frontier_coverage_5": 0.023123869051535923,
"eval_rewards/frontier_ece_reward": 0.004123160368180834,
"eval_rewards/frontier_entropy_batch_reward": -0.9895833233992258,
"eval_runtime": 196.7962,
"eval_samples_per_second": 5.081,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4184027810891469,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.46373791495958966,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20920139054457346,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20920139054457346,
"eval_signal/advantage_abs_mean": 0.19404976814985275,
"eval_signal/advantage_pre_scale_abs_mean": 0.19404976814985275,
"eval_signal/advantage_pre_scale_std": 0.22494453191757202,
"eval_signal/advantage_std": 0.22494453191757202,
"eval_signal/brier_reward/centered_abs_mean": 0.18808596084515253,
"eval_signal/brier_reward/group_bin_occupancy": 0.8472222222222223,
"eval_signal/brier_reward/group_std_mean": 0.2454528883099556,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018808596457044285,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.018808596457044285,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.052401296173532806,
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.40972222222222227,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08490791233877341,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005240129694963495,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005240129694963495,
"eval_signal/format_reward/centered_abs_mean": 0.019965277674297493,
"eval_signal/format_reward/group_bin_occupancy": 0.1597222222222222,
"eval_signal/format_reward/group_std_mean": 0.05294674697021643,
"eval_signal/format_reward/group_zero_std_frac": 0.722222238779068,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009982638837148746,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.009982638837148746,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0023627388873137534,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6041666666666666,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005246327879528205,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.953423669775172e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.953423669775172e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.29363420108954114,
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9166666666666666,
"eval_signal/frontier_coverage_0/group_std_mean": 0.40715718269348145,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0036704275213802853,
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036704275213802853,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.29363420108954114,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9166666666666666,
"eval_signal/frontier_coverage_1/group_std_mean": 0.40715718269348145,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036704275213802853,
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036704275213802853,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.29363420108954114,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9166666666666666,
"eval_signal/frontier_coverage_10/group_std_mean": 0.40715718269348145,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036704275213802853,
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036704275213802853,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.29363420108954114,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9166666666666666,
"eval_signal/frontier_coverage_15/group_std_mean": 0.40715718269348145,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036704275213802853,
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036704275213802853,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.21985628455877304,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9097222222222223,
"eval_signal/frontier_coverage_20/group_std_mean": 0.3140091150999069,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027482035026575127,
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027482035026575127,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.08635564024249713,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.920138888888889,
"eval_signal/frontier_coverage_25/group_std_mean": 0.11180556441346805,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010794455301947892,
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010794455301947892,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.29363420108954114,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9166666666666666,
"eval_signal/frontier_coverage_5/group_std_mean": 0.40715718269348145,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036704275213802853,
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036704275213802853,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.03141581453382969,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9791666666666666,
"eval_signal/frontier_ece_reward/group_std_mean": 0.04069533385336399,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031415815077101192,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031415815077101192,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.019965277674297493,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.1597222222222222,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.05294674697021643,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.722222238779068,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0019965278139958778,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0019965278139958778,
"eval_steps_per_second": 0.03,
"step": 150
},
{
"calibration/aurc": 0.14328247622139104,
"calibration/batch_distribution_entropy": 0.968343329600269,
"calibration/batch_entropy_100bins": 0.9604368265719134,
"calibration/batch_entropy_10bins": 0.968343329600269,
"calibration/batch_entropy_50bins": 0.9692432146982082,
"calibration/batch_uniqueness": 0.9501986612953859,
"calibration/buffer_distribution_entropy": 0.9723362683790409,
"calibration/buffer_entropy_100bins": 0.9750311013963866,
"calibration/buffer_entropy_10bins": 0.9723362683790409,
"calibration/buffer_entropy_50bins": 0.9780727171466873,
"calibration/confidence_entropy": 0.4911174549743745,
"calibration/coverage@0%": 0.02847163633341644,
"calibration/coverage@1%": 0.02847163633341644,
"calibration/coverage@10%": 0.4854469320479792,
"calibration/coverage@15%": 0.6681463087620155,
"calibration/coverage@20%": 0.7713066843956897,
"calibration/coverage@25%": 0.8516330092246323,
"calibration/coverage@30%": 0.9198731266794095,
"calibration/coverage@5%": 0.17589222970165377,
"calibration/ece": 0.19967351461458113,
"calibration/mean_confidence": 0.5618897389159396,
"calibration/prompt_uniqueness": 0.8516510929037338,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.008072916666666674,
"completions/max_length": 3505.6,
"completions/max_terminated_length": 3505.6,
"completions/mean_length": 754.0788330078125,
"completions/mean_terminated_length": 760.2467651367188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 255.2,
"epoch": 0.3719953500581243,
"grad_norm": 0.0003978419699706137,
"learning_rate": 1.5963855421686747e-06,
"loss": -0.0069,
"num_tokens": 343865174.0,
"reward": 1.0113879680633544,
"reward_std": 0.12805038392543794,
"rewards/accuracy_reward": 0.7330729126930237,
"rewards/brier_reward": 0.805462658405304,
"rewards/confidence_uniqueness_reward": 0.942124092578888,
"rewards/format_reward": 0.9918402671813965,
"rewards/frontier_aurc_reward": -0.001076408103108406,
"rewards/frontier_coverage_0": -0.007807806041091681,
"rewards/frontier_coverage_1": -0.007807806041091681,
"rewards/frontier_coverage_10": -0.007807806041091681,
"rewards/frontier_coverage_15": -0.006517884694039822,
"rewards/frontier_coverage_20": 0.012862606934504583,
"rewards/frontier_coverage_25": 0.07125783488154411,
"rewards/frontier_coverage_5": -0.007807806041091681,
"rewards/frontier_ece_reward": 0.0006221902323886753,
"rewards/frontier_entropy_batch_reward": -0.26455708146095275,
"signal/accuracy_reward/centered_abs_mean": 0.16763780415058135,
"signal/accuracy_reward/group_bin_occupancy": 0.1986111111111111,
"signal/accuracy_reward/group_std_mean": 0.2156655490398407,
"signal/accuracy_reward/group_zero_std_frac": 0.4111111044883728,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08381890207529068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08381890207529068,
"signal/advantage_abs_mean": 0.09625413119792939,
"signal/advantage_pre_scale_abs_mean": 0.09625413119792939,
"signal/advantage_pre_scale_std": 0.15237030386924744,
"signal/advantage_std": 0.15237030386924744,
"signal/brier_reward/centered_abs_mean": 0.13339466452598572,
"signal/brier_reward/group_bin_occupancy": 0.8326388888888889,
"signal/brier_reward/group_std_mean": 0.17345697283744813,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013339466601610183,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013339466601610183,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026469842717051505,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8583333333333332,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04400735050439834,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026469843462109564,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026469843462109564,
"signal/format_reward/centered_abs_mean": 0.014691840298473835,
"signal/format_reward/group_bin_occupancy": 0.14097222222222222,
"signal/format_reward/group_std_mean": 0.02975890673696995,
"signal/format_reward/group_zero_std_frac": 0.8722222208976745,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007345920149236918,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007345920149236918,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016013333341106772,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.701388888888889,
"signal/frontier_aurc_reward/group_std_mean": 0.0028420645277947186,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0016666530864315e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0016666530864315e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19111153185367585,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8305555555555555,
"signal/frontier_coverage_0/group_std_mean": 0.24907057583332062,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023888942785561086,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023888942785561086,
"signal/frontier_coverage_1/centered_abs_mean": 0.19111153185367585,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8305555555555555,
"signal/frontier_coverage_1/group_std_mean": 0.24907057583332062,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023888942785561086,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023888942785561086,
"signal/frontier_coverage_10/centered_abs_mean": 0.19111153185367585,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8305555555555555,
"signal/frontier_coverage_10/group_std_mean": 0.24907057583332062,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023888942785561086,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023888942785561086,
"signal/frontier_coverage_15/centered_abs_mean": 0.18777382373809814,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8302083333333334,
"signal/frontier_coverage_15/group_std_mean": 0.2448781967163086,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023471728432923555,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023471728432923555,
"signal/frontier_coverage_20/centered_abs_mean": 0.11022986769676209,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8333333333333334,
"signal/frontier_coverage_20/group_std_mean": 0.14672330319881438,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013778733555227518,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013778733555227518,
"signal/frontier_coverage_25/centered_abs_mean": 0.06623064428567886,
"signal/frontier_coverage_25/group_bin_occupancy": 0.923611111111111,
"signal/frontier_coverage_25/group_std_mean": 0.08523637503385544,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008278830791823566,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008278830791823566,
"signal/frontier_coverage_5/centered_abs_mean": 0.19111153185367585,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8305555555555555,
"signal/frontier_coverage_5/group_std_mean": 0.24907057583332062,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023888942785561086,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023888942785561086,
"signal/frontier_ece_reward/centered_abs_mean": 0.022619354724884033,
"signal/frontier_ece_reward/group_bin_occupancy": 0.892361111111111,
"signal/frontier_ece_reward/group_std_mean": 0.029080601409077644,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022619356401264667,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022619356401264667,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31687353253364564,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7545138888888889,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3865100502967834,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031687355041503905,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031687355041503905,
"step": 155
},
{
"calibration/aurc": 0.12690070084218358,
"calibration/batch_distribution_entropy": 0.9355695973242885,
"calibration/batch_entropy_100bins": 0.9378717298573725,
"calibration/batch_entropy_10bins": 0.9355695973242885,
"calibration/batch_entropy_50bins": 0.9463927534050771,
"calibration/batch_uniqueness": 0.9454156366716366,
"calibration/buffer_distribution_entropy": 0.9767544955765025,
"calibration/buffer_entropy_100bins": 0.9816366892198417,
"calibration/buffer_entropy_10bins": 0.9767544955765025,
"calibration/buffer_entropy_50bins": 0.9828938587347263,
"calibration/confidence_entropy": 0.4945956173161183,
"calibration/coverage@0%": 0.08913938989672962,
"calibration/coverage@1%": 0.08913938989672962,
"calibration/coverage@10%": 0.6179837830648094,
"calibration/coverage@15%": 0.7267282037552255,
"calibration/coverage@20%": 0.7877159740059231,
"calibration/coverage@25%": 0.8556935730627254,
"calibration/coverage@30%": 0.881283422459893,
"calibration/coverage@5%": 0.4197157681091827,
"calibration/ece": 0.15444755446701258,
"calibration/mean_confidence": 0.6324834605056432,
"calibration/prompt_uniqueness": 0.8498323599648391,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3734.0,
"completions/max_terminated_length": 3734.0,
"completions/mean_length": 752.8614624023437,
"completions/mean_terminated_length": 761.7844116210938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 233.2,
"epoch": 0.38399520005999926,
"grad_norm": 0.0003355911758262664,
"learning_rate": 1.4457831325301204e-06,
"loss": -0.0085,
"num_tokens": 355625434.0,
"reward": 0.9778003096580505,
"reward_std": 0.1277013435959816,
"rewards/accuracy_reward": 0.6736111164093017,
"rewards/brier_reward": 0.7985804080963135,
"rewards/confidence_uniqueness_reward": 0.9363638043403626,
"rewards/format_reward": 0.9882812380790711,
"rewards/frontier_aurc_reward": -0.0021719550946727394,
"rewards/frontier_coverage_0": 0.020820068009197713,
"rewards/frontier_coverage_1": 0.020820068009197713,
"rewards/frontier_coverage_10": 0.020820068009197713,
"rewards/frontier_coverage_15": 0.021759903896600007,
"rewards/frontier_coverage_20": 0.029553866386413573,
"rewards/frontier_coverage_25": 0.08654351085424423,
"rewards/frontier_coverage_5": 0.020820068009197713,
"rewards/frontier_ece_reward": 0.003279139272217435,
"rewards/frontier_entropy_batch_reward": -0.2970531314611435,
"signal/accuracy_reward/centered_abs_mean": 0.15097656100988388,
"signal/accuracy_reward/group_bin_occupancy": 0.19722222222222222,
"signal/accuracy_reward/group_std_mean": 0.20050234198570252,
"signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07548828050494194,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07548828050494194,
"signal/advantage_abs_mean": 0.09499836862087249,
"signal/advantage_pre_scale_abs_mean": 0.09499836862087249,
"signal/advantage_pre_scale_std": 0.15216540694236755,
"signal/advantage_std": 0.15216540694236755,
"signal/brier_reward/centered_abs_mean": 0.1346014305949211,
"signal/brier_reward/group_bin_occupancy": 0.845486111111111,
"signal/brier_reward/group_std_mean": 0.17376158237457276,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013460143469274044,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013460143469274044,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03248362205922604,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.840625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05201292261481285,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032483623828738926,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032483623828738926,
"signal/format_reward/centered_abs_mean": 0.019677734375,
"signal/format_reward/group_bin_occupancy": 0.14375,
"signal/format_reward/group_std_mean": 0.03653144314885139,
"signal/format_reward/group_zero_std_frac": 0.85,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0098388671875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0098388671875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00255175766069442,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.69375,
"signal/frontier_aurc_reward/group_std_mean": 0.004304410610347986,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1896971267997285e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1896971267997285e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.17283792197704315,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8322916666666667,
"signal/frontier_coverage_0/group_std_mean": 0.22631404995918275,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021604740992188453,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021604740992188453,
"signal/frontier_coverage_1/centered_abs_mean": 0.17283792197704315,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8322916666666667,
"signal/frontier_coverage_1/group_std_mean": 0.22631404995918275,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021604740992188453,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021604740992188453,
"signal/frontier_coverage_10/centered_abs_mean": 0.17283792197704315,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8322916666666667,
"signal/frontier_coverage_10/group_std_mean": 0.22631404995918275,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021604740992188453,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021604740992188453,
"signal/frontier_coverage_15/centered_abs_mean": 0.1649569660425186,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8302083333333334,
"signal/frontier_coverage_15/group_std_mean": 0.21631556153297424,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020619621267542244,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020619621267542244,
"signal/frontier_coverage_20/centered_abs_mean": 0.0718404695391655,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8743055555555556,
"signal/frontier_coverage_20/group_std_mean": 0.09588805437088013,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008980058599263429,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008980058599263429,
"signal/frontier_coverage_25/centered_abs_mean": 0.07879409492015839,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9197916666666666,
"signal/frontier_coverage_25/group_std_mean": 0.10093283802270889,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009849262423813343,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009849262423813343,
"signal/frontier_coverage_5/centered_abs_mean": 0.17283792197704315,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8322916666666667,
"signal/frontier_coverage_5/group_std_mean": 0.22631404995918275,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021604740992188453,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021604740992188453,
"signal/frontier_ece_reward/centered_abs_mean": 0.02024664729833603,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8885416666666666,
"signal/frontier_ece_reward/group_std_mean": 0.026085399091243744,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020246647531166674,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020246647531166674,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3272906422615051,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7503472222222223,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3956748187541962,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03272906616330147,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03272906616330147,
"step": 160
},
{
"calibration/aurc": 0.14685283005823302,
"calibration/batch_distribution_entropy": 0.9624874588175226,
"calibration/batch_entropy_100bins": 0.9493697551644169,
"calibration/batch_entropy_10bins": 0.9624874588175226,
"calibration/batch_entropy_50bins": 0.9618907631874976,
"calibration/batch_uniqueness": 0.9484851363978098,
"calibration/buffer_distribution_entropy": 0.9804740832245967,
"calibration/buffer_entropy_100bins": 0.9871675813981946,
"calibration/buffer_entropy_10bins": 0.9804740832245967,
"calibration/buffer_entropy_50bins": 0.9869692646206021,
"calibration/confidence_entropy": 0.48256626117106355,
"calibration/coverage@0%": 0.16632372470645526,
"calibration/coverage@1%": 0.2175999027169265,
"calibration/coverage@10%": 0.4748341393546909,
"calibration/coverage@15%": 0.6035655088332543,
"calibration/coverage@20%": 0.6774566915283347,
"calibration/coverage@25%": 0.7548037091669609,
"calibration/coverage@30%": 0.8582674395252046,
"calibration/coverage@5%": 0.34612221071049537,
"calibration/ece": 0.1872612900282566,
"calibration/mean_confidence": 0.5299400808356729,
"calibration/prompt_uniqueness": 0.8486473671237833,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011545138888888907,
"completions/max_length": 3253.6,
"completions/max_terminated_length": 3253.6,
"completions/mean_length": 786.2470581054688,
"completions/mean_terminated_length": 795.4893676757813,
"completions/min_length": 0.0,
"completions/min_terminated_length": 260.2,
"epoch": 0.39599505006187424,
"grad_norm": 0.00034029711969196796,
"learning_rate": 1.2951807228915664e-06,
"loss": -0.0074,
"num_tokens": 367822072.0,
"reward": 0.9808308362960816,
"reward_std": 0.12498695105314254,
"rewards/accuracy_reward": 0.6665798664093018,
"rewards/brier_reward": 0.8007077097892761,
"rewards/confidence_uniqueness_reward": 0.9404749751091004,
"rewards/format_reward": 0.9883680582046509,
"rewards/frontier_aurc_reward": -0.0013396847061812878,
"rewards/frontier_coverage_0": 0.030891514010727407,
"rewards/frontier_coverage_1": 0.030891514010727407,
"rewards/frontier_coverage_10": 0.030891514010727407,
"rewards/frontier_coverage_15": 0.033891326561570165,
"rewards/frontier_coverage_20": 0.04144119620323181,
"rewards/frontier_coverage_25": 0.0987599179148674,
"rewards/frontier_coverage_5": 0.030891514010727407,
"rewards/frontier_ece_reward": 0.002779871807433665,
"rewards/frontier_entropy_batch_reward": -0.2474340170621872,
"signal/accuracy_reward/centered_abs_mean": 0.14655490815639496,
"signal/accuracy_reward/group_bin_occupancy": 0.196875,
"signal/accuracy_reward/group_std_mean": 0.19666456878185273,
"signal/accuracy_reward/group_zero_std_frac": 0.425,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07327745407819748,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07327745407819748,
"signal/advantage_abs_mean": 0.09191209226846694,
"signal/advantage_pre_scale_abs_mean": 0.09191209226846694,
"signal/advantage_pre_scale_std": 0.14856612384319307,
"signal/advantage_std": 0.14856612384319307,
"signal/brier_reward/centered_abs_mean": 0.13006339371204376,
"signal/brier_reward/group_bin_occupancy": 0.8291666666666666,
"signal/brier_reward/group_std_mean": 0.1694784790277481,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013006339780986309,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013006339780986309,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02956257574260235,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8458333333333334,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04808454513549805,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002956257527694106,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002956257527694106,
"signal/format_reward/centered_abs_mean": 0.019140625,
"signal/format_reward/group_bin_occupancy": 0.14270833333333333,
"signal/format_reward/group_std_mean": 0.03554247245192528,
"signal/format_reward/group_zero_std_frac": 0.8583333253860473,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0095703125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0095703125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015813488746061922,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.709375,
"signal/frontier_aurc_reward/group_std_mean": 0.00276105348020792,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9766861441894434e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9766861441894434e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18960587084293365,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8322916666666668,
"signal/frontier_coverage_0/group_std_mean": 0.2463166147470474,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002370073506608605,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002370073506608605,
"signal/frontier_coverage_1/centered_abs_mean": 0.18960587084293365,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8322916666666668,
"signal/frontier_coverage_1/group_std_mean": 0.2463166147470474,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002370073506608605,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002370073506608605,
"signal/frontier_coverage_10/centered_abs_mean": 0.18960587084293365,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8322916666666668,
"signal/frontier_coverage_10/group_std_mean": 0.2463166147470474,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002370073506608605,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002370073506608605,
"signal/frontier_coverage_15/centered_abs_mean": 0.16226165294647216,
"signal/frontier_coverage_15/group_bin_occupancy": 0.828125,
"signal/frontier_coverage_15/group_std_mean": 0.21179051101207733,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020282708341255785,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020282708341255785,
"signal/frontier_coverage_20/centered_abs_mean": 0.06612305119633674,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8989583333333332,
"signal/frontier_coverage_20/group_std_mean": 0.08611558228731156,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008265381446108222,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008265381446108222,
"signal/frontier_coverage_25/centered_abs_mean": 0.0762871414422989,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9041666666666666,
"signal/frontier_coverage_25/group_std_mean": 0.0986581414937973,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009535892982967198,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009535892982967198,
"signal/frontier_coverage_5/centered_abs_mean": 0.18960587084293365,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8322916666666668,
"signal/frontier_coverage_5/group_std_mean": 0.2463166147470474,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002370073506608605,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002370073506608605,
"signal/frontier_ece_reward/centered_abs_mean": 0.02045116536319256,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8788194444444445,
"signal/frontier_ece_reward/group_std_mean": 0.026219840347766876,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020451165502890943,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020451165502890943,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3110631048679352,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7541666666666667,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38286496996879577,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031106310337781905,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031106310337781905,
"step": 165
},
{
"calibration/aurc": 0.11506329568369203,
"calibration/batch_distribution_entropy": 0.9444845017520622,
"calibration/batch_entropy_100bins": 0.9396727075432679,
"calibration/batch_entropy_10bins": 0.9444845017520622,
"calibration/batch_entropy_50bins": 0.9480220782226461,
"calibration/batch_uniqueness": 0.9441397102617053,
"calibration/buffer_distribution_entropy": 0.9836986498366203,
"calibration/buffer_entropy_100bins": 0.9908642173134584,
"calibration/buffer_entropy_10bins": 0.9836986498366203,
"calibration/buffer_entropy_50bins": 0.9898192285924958,
"calibration/confidence_entropy": 0.4891014253676884,
"calibration/coverage@0%": 0.07110792272110364,
"calibration/coverage@1%": 0.14246208938777033,
"calibration/coverage@10%": 0.5021638829429665,
"calibration/coverage@15%": 0.7248016285021122,
"calibration/coverage@20%": 0.8123163068597996,
"calibration/coverage@25%": 0.8979261867360814,
"calibration/coverage@30%": 0.9481275715867081,
"calibration/coverage@5%": 0.4241632481301785,
"calibration/ece": 0.14160998775248346,
"calibration/mean_confidence": 0.6190916753688434,
"calibration/prompt_uniqueness": 0.8486469799052279,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0057291666666666515,
"completions/max_length": 3744.8,
"completions/max_terminated_length": 3744.8,
"completions/mean_length": 763.334375,
"completions/mean_terminated_length": 767.7540161132813,
"completions/min_length": 0.0,
"completions/min_terminated_length": 250.4,
"epoch": 0.4079949000637492,
"grad_norm": 0.00034656302887015045,
"learning_rate": 1.1445783132530121e-06,
"loss": -0.0039,
"num_tokens": 379704868.0,
"reward": 1.0055498480796814,
"reward_std": 0.11865353286266327,
"rewards/accuracy_reward": 0.7196180582046509,
"rewards/brier_reward": 0.8130927443504333,
"rewards/confidence_uniqueness_reward": 0.943022859096527,
"rewards/format_reward": 0.9942708253860474,
"rewards/frontier_aurc_reward": -0.001395798078738153,
"rewards/frontier_coverage_0": 0.0038308378309011458,
"rewards/frontier_coverage_1": 0.0038308378309011458,
"rewards/frontier_coverage_10": 0.0038315469399094583,
"rewards/frontier_coverage_15": 0.014347630552947521,
"rewards/frontier_coverage_20": 0.04858548492193222,
"rewards/frontier_coverage_25": 0.12771541029214858,
"rewards/frontier_coverage_5": 0.0038308378309011458,
"rewards/frontier_ece_reward": 0.00022077972535043954,
"rewards/frontier_entropy_batch_reward": -0.29585447907447815,
"signal/accuracy_reward/centered_abs_mean": 0.14562717080116272,
"signal/accuracy_reward/group_bin_occupancy": 0.19722222222222222,
"signal/accuracy_reward/group_std_mean": 0.19776785969734192,
"signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07281358540058136,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07281358540058136,
"signal/advantage_abs_mean": 0.08716671168804169,
"signal/advantage_pre_scale_abs_mean": 0.08716671168804169,
"signal/advantage_pre_scale_std": 0.13849908411502837,
"signal/advantage_std": 0.13849908411502837,
"signal/brier_reward/centered_abs_mean": 0.12307202219963073,
"signal/brier_reward/group_bin_occupancy": 0.8347222222222221,
"signal/brier_reward/group_std_mean": 0.16019360721111298,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012307202816009522,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012307202816009522,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023052535578608514,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8871527777777777,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0379045195877552,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023052536882460116,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023052536882460116,
"signal/format_reward/centered_abs_mean": 0.010394965391606092,
"signal/format_reward/group_bin_occupancy": 0.13784722222222223,
"signal/format_reward/group_std_mean": 0.022536759078502656,
"signal/format_reward/group_zero_std_frac": 0.8972222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005197482695803046,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005197482695803046,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017953221686184406,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7086805555555555,
"signal/frontier_aurc_reward/group_std_mean": 0.0030906103551387788,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.244152765342733e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.244152765342733e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.17162533700466157,
"signal/frontier_coverage_0/group_bin_occupancy": 0.829861111111111,
"signal/frontier_coverage_0/group_std_mean": 0.22700339257717134,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00214531677775085,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00214531677775085,
"signal/frontier_coverage_1/centered_abs_mean": 0.17162533700466157,
"signal/frontier_coverage_1/group_bin_occupancy": 0.829861111111111,
"signal/frontier_coverage_1/group_std_mean": 0.22700339257717134,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00214531677775085,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00214531677775085,
"signal/frontier_coverage_10/centered_abs_mean": 0.17162414491176606,
"signal/frontier_coverage_10/group_bin_occupancy": 0.829861111111111,
"signal/frontier_coverage_10/group_std_mean": 0.2270018845796585,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002145301876589656,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002145301876589656,
"signal/frontier_coverage_15/centered_abs_mean": 0.13137867748737336,
"signal/frontier_coverage_15/group_bin_occupancy": 0.828125,
"signal/frontier_coverage_15/group_std_mean": 0.1753629505634308,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00164223350584507,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00164223350584507,
"signal/frontier_coverage_20/centered_abs_mean": 0.058792735636234286,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9239583333333332,
"signal/frontier_coverage_20/group_std_mean": 0.07586074471473694,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007349092396907508,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007349092396907508,
"signal/frontier_coverage_25/centered_abs_mean": 0.08862319886684418,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9076388888888889,
"signal/frontier_coverage_25/group_std_mean": 0.11482690125703812,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011077900417149067,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011077900417149067,
"signal/frontier_coverage_5/centered_abs_mean": 0.17162533700466157,
"signal/frontier_coverage_5/group_bin_occupancy": 0.829861111111111,
"signal/frontier_coverage_5/group_std_mean": 0.22700339257717134,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00214531677775085,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00214531677775085,
"signal/frontier_ece_reward/centered_abs_mean": 0.019284069538116455,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8850694444444445,
"signal/frontier_ece_reward/group_std_mean": 0.024903832748532296,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019284070702269673,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019284070702269673,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3286241352558136,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7600694444444444,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39900038838386537,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03286241367459297,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03286241367459297,
"step": 170
},
{
"calibration/aurc": 0.1167342775029246,
"calibration/batch_distribution_entropy": 0.9745598037903308,
"calibration/batch_entropy_100bins": 0.9560613502038893,
"calibration/batch_entropy_10bins": 0.9745598037903308,
"calibration/batch_entropy_50bins": 0.9679135925927282,
"calibration/batch_uniqueness": 0.9500906165718821,
"calibration/buffer_distribution_entropy": 0.9859050713779315,
"calibration/buffer_entropy_100bins": 0.9924500141789924,
"calibration/buffer_entropy_10bins": 0.9859050713779315,
"calibration/buffer_entropy_50bins": 0.9912866063537695,
"calibration/confidence_entropy": 0.4973909231670772,
"calibration/coverage@0%": 0.0202633365766245,
"calibration/coverage@1%": 0.0202633365766245,
"calibration/coverage@10%": 0.5584808730819832,
"calibration/coverage@15%": 0.7098551422535211,
"calibration/coverage@20%": 0.8395232713826571,
"calibration/coverage@25%": 0.9306284277563652,
"calibration/coverage@30%": 0.9889786840178486,
"calibration/coverage@5%": 0.2781329842712045,
"calibration/ece": 0.19580150982554967,
"calibration/mean_confidence": 0.5390505752013575,
"calibration/prompt_uniqueness": 0.8520957156663304,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010677083333333347,
"completions/max_length": 3945.2,
"completions/max_terminated_length": 3945.2,
"completions/mean_length": 797.4620727539062,
"completions/mean_terminated_length": 806.0635620117188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 246.0,
"epoch": 0.4199947500656242,
"grad_norm": 0.0003526929940562695,
"learning_rate": 9.93975903614458e-07,
"loss": -0.0088,
"num_tokens": 391999599.0,
"reward": 1.001812708377838,
"reward_std": 0.1259681537747383,
"rewards/accuracy_reward": 0.7131076335906983,
"rewards/brier_reward": 0.8003608345985412,
"rewards/confidence_uniqueness_reward": 0.9405298233032227,
"rewards/format_reward": 0.9892361164093018,
"rewards/frontier_aurc_reward": -0.0010344096925109624,
"rewards/frontier_coverage_0": -0.00219659386202693,
"rewards/frontier_coverage_1": -0.00219659386202693,
"rewards/frontier_coverage_10": -0.0021957614459097385,
"rewards/frontier_coverage_15": 0.010962388198822736,
"rewards/frontier_coverage_20": 0.055274682492017745,
"rewards/frontier_coverage_25": 0.13252132833004,
"rewards/frontier_coverage_5": -0.00219659386202693,
"rewards/frontier_ece_reward": -0.0011111346306279303,
"rewards/frontier_entropy_batch_reward": -0.2569884657859802,
"signal/accuracy_reward/centered_abs_mean": 0.15221896767616272,
"signal/accuracy_reward/group_bin_occupancy": 0.20381944444444447,
"signal/accuracy_reward/group_std_mean": 0.20974666476249695,
"signal/accuracy_reward/group_zero_std_frac": 0.3694444537162781,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07610948383808136,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07610948383808136,
"signal/advantage_abs_mean": 0.09184942096471786,
"signal/advantage_pre_scale_abs_mean": 0.09184942096471786,
"signal/advantage_pre_scale_std": 0.14980422854423522,
"signal/advantage_std": 0.14980422854423522,
"signal/brier_reward/centered_abs_mean": 0.13161776959896088,
"signal/brier_reward/group_bin_occupancy": 0.8381944444444442,
"signal/brier_reward/group_std_mean": 0.17007612586021423,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013161776773631572,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013161776773631572,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02938559278845787,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8579861111111111,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04647618532180786,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002938559278845787,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002938559278845787,
"signal/format_reward/centered_abs_mean": 0.018250868283212186,
"signal/format_reward/group_bin_occupancy": 0.14131944444444447,
"signal/format_reward/group_std_mean": 0.032946827635169026,
"signal/format_reward/group_zero_std_frac": 0.8694444417953491,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009125434141606093,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009125434141606093,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00137441111728549,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.70625,
"signal/frontier_aurc_reward/group_std_mean": 0.0025132787879556416,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7180139366246294e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7180139366246294e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19130564630031585,
"signal/frontier_coverage_0/group_bin_occupancy": 0.825,
"signal/frontier_coverage_0/group_std_mean": 0.2510385990142822,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023913206066936256,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023913206066936256,
"signal/frontier_coverage_1/centered_abs_mean": 0.19130564630031585,
"signal/frontier_coverage_1/group_bin_occupancy": 0.825,
"signal/frontier_coverage_1/group_std_mean": 0.2510385990142822,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023913206066936256,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023913206066936256,
"signal/frontier_coverage_10/centered_abs_mean": 0.19130274057388305,
"signal/frontier_coverage_10/group_bin_occupancy": 0.825,
"signal/frontier_coverage_10/group_std_mean": 0.25103478133678436,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002391284331679344,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002391284331679344,
"signal/frontier_coverage_15/centered_abs_mean": 0.13286823630332947,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8291666666666666,
"signal/frontier_coverage_15/group_std_mean": 0.1757916271686554,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016608530189841987,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016608530189841987,
"signal/frontier_coverage_20/centered_abs_mean": 0.06132784262299538,
"signal/frontier_coverage_20/group_bin_occupancy": 0.91875,
"signal/frontier_coverage_20/group_std_mean": 0.07887878715991974,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007665980607271195,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007665980607271195,
"signal/frontier_coverage_25/centered_abs_mean": 0.08898466527462005,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9013888888888889,
"signal/frontier_coverage_25/group_std_mean": 0.11519535034894943,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011123083299025894,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011123083299025894,
"signal/frontier_coverage_5/centered_abs_mean": 0.19130564630031585,
"signal/frontier_coverage_5/group_bin_occupancy": 0.825,
"signal/frontier_coverage_5/group_std_mean": 0.2510385990142822,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023913206066936256,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023913206066936256,
"signal/frontier_ece_reward/centered_abs_mean": 0.020530903711915016,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8666666666666668,
"signal/frontier_ece_reward/group_std_mean": 0.026369891688227655,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002053090324625373,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002053090324625373,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3158855140209198,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7579861111111111,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3868151426315308,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03158855028450489,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03158855028450489,
"step": 175
},
{
"calibration/aurc": 0.0817330623111906,
"calibration/batch_distribution_entropy": 0.9711504748208586,
"calibration/batch_entropy_100bins": 0.9562899168107373,
"calibration/batch_entropy_10bins": 0.9711504748208586,
"calibration/batch_entropy_50bins": 0.9662422602252176,
"calibration/batch_uniqueness": 0.9499827534399664,
"calibration/buffer_distribution_entropy": 0.9860851734186549,
"calibration/buffer_entropy_100bins": 0.9926045511496687,
"calibration/buffer_entropy_10bins": 0.9860851734186549,
"calibration/buffer_entropy_50bins": 0.9914161779811446,
"calibration/confidence_entropy": 0.48496192333899246,
"calibration/coverage@0%": 0.1004148895158747,
"calibration/coverage@1%": 0.19970107002610285,
"calibration/coverage@10%": 0.6926593040155616,
"calibration/coverage@15%": 0.8085424051687502,
"calibration/coverage@20%": 0.8868322054716925,
"calibration/coverage@25%": 0.9545527002885805,
"calibration/coverage@30%": 0.970757180156658,
"calibration/coverage@5%": 0.5066726077108479,
"calibration/ece": 0.178456279434725,
"calibration/mean_confidence": 0.5863382511632422,
"calibration/prompt_uniqueness": 0.8510821885227383,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010416666666666652,
"completions/max_length": 3804.2,
"completions/max_terminated_length": 3804.2,
"completions/mean_length": 778.0964477539062,
"completions/mean_terminated_length": 786.2614990234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 262.0,
"epoch": 0.4319946000674992,
"grad_norm": 0.0003696930652949959,
"learning_rate": 8.433734939759036e-07,
"loss": -0.0084,
"num_tokens": 404063238.0,
"reward": 0.9986873149871827,
"reward_std": 0.12596749514341354,
"rewards/accuracy_reward": 0.7086805582046509,
"rewards/brier_reward": 0.8022766947746277,
"rewards/confidence_uniqueness_reward": 0.9392195105552673,
"rewards/format_reward": 0.9894965291023254,
"rewards/frontier_aurc_reward": -0.0018584353383630514,
"rewards/frontier_coverage_0": 0.00486559234559536,
"rewards/frontier_coverage_1": 0.00486559234559536,
"rewards/frontier_coverage_10": 0.0048669856041669846,
"rewards/frontier_coverage_15": 0.018902628193609418,
"rewards/frontier_coverage_20": 0.06483815237879753,
"rewards/frontier_coverage_25": 0.14728063642978667,
"rewards/frontier_coverage_5": 0.00486559234559536,
"rewards/frontier_ece_reward": -0.0008010620949789882,
"rewards/frontier_entropy_batch_reward": -0.2757860660552979,
"signal/accuracy_reward/centered_abs_mean": 0.151953125,
"signal/accuracy_reward/group_bin_occupancy": 0.19305555555555556,
"signal/accuracy_reward/group_std_mean": 0.19671571254730225,
"signal/accuracy_reward/group_zero_std_frac": 0.4555555582046509,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0759765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0759765625,
"signal/advantage_abs_mean": 0.09426534920930862,
"signal/advantage_pre_scale_abs_mean": 0.09426534920930862,
"signal/advantage_pre_scale_std": 0.1520604223012924,
"signal/advantage_std": 0.1520604223012924,
"signal/brier_reward/centered_abs_mean": 0.13269921243190766,
"signal/brier_reward/group_bin_occupancy": 0.8253472222222221,
"signal/brier_reward/group_std_mean": 0.1710539847612381,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013269922323524952,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013269922323524952,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02954910360276699,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8520833333333334,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04796513915061951,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029549104161560535,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029549104161560535,
"signal/format_reward/centered_abs_mean": 0.01773546002805233,
"signal/format_reward/group_bin_occupancy": 0.14270833333333333,
"signal/format_reward/group_std_mean": 0.0337453979998827,
"signal/format_reward/group_zero_std_frac": 0.8583333373069764,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008867730014026165,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.008867730014026165,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022768301889300345,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.68125,
"signal/frontier_aurc_reward/group_std_mean": 0.0042192541994154455,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8460378598538227e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8460378598538227e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18299897611141205,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8215277777777779,
"signal/frontier_coverage_0/group_std_mean": 0.23625112175941468,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022874871734529733,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022874871734529733,
"signal/frontier_coverage_1/centered_abs_mean": 0.18299897611141205,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8215277777777779,
"signal/frontier_coverage_1/group_std_mean": 0.23625112175941468,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022874871734529733,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022874871734529733,
"signal/frontier_coverage_10/centered_abs_mean": 0.18299424648284912,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8215277777777779,
"signal/frontier_coverage_10/group_std_mean": 0.23624541461467743,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002287428034469485,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002287428034469485,
"signal/frontier_coverage_15/centered_abs_mean": 0.10037829428911209,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8378472222222223,
"signal/frontier_coverage_15/group_std_mean": 0.13245663940906524,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012547286925837398,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012547286925837398,
"signal/frontier_coverage_20/centered_abs_mean": 0.06611849516630172,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9319444444444445,
"signal/frontier_coverage_20/group_std_mean": 0.0842194378376007,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008264812408015132,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008264812408015132,
"signal/frontier_coverage_25/centered_abs_mean": 0.10266990959644318,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8899305555555557,
"signal/frontier_coverage_25/group_std_mean": 0.1339954525232315,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012833738466724754,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012833738466724754,
"signal/frontier_coverage_5/centered_abs_mean": 0.18299897611141205,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8215277777777779,
"signal/frontier_coverage_5/group_std_mean": 0.23625112175941468,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022874871734529733,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022874871734529733,
"signal/frontier_ece_reward/centered_abs_mean": 0.019692152738571167,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8805555555555555,
"signal/frontier_ece_reward/group_std_mean": 0.024879150092601776,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001969215413555503,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001969215413555503,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3193018019199371,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7447916666666667,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38962661623954775,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031930181011557576,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031930181011557576,
"step": 180
},
{
"calibration/aurc": 0.16412042486004724,
"calibration/batch_distribution_entropy": 0.9638283593070748,
"calibration/batch_entropy_100bins": 0.952921553941341,
"calibration/batch_entropy_10bins": 0.9638283593070748,
"calibration/batch_entropy_50bins": 0.961955903293342,
"calibration/batch_uniqueness": 0.9487742638138078,
"calibration/buffer_distribution_entropy": 0.9855597957864515,
"calibration/buffer_entropy_100bins": 0.9923941439583099,
"calibration/buffer_entropy_10bins": 0.9855597957864515,
"calibration/buffer_entropy_50bins": 0.9911491227931158,
"calibration/confidence_entropy": 0.49912697432262654,
"calibration/coverage@0%": 0.015748031496062992,
"calibration/coverage@1%": 0.015748031496062992,
"calibration/coverage@10%": 0.23657545417788833,
"calibration/coverage@15%": 0.7106708432938508,
"calibration/coverage@20%": 0.8636267072360081,
"calibration/coverage@25%": 0.9296587926509187,
"calibration/coverage@30%": 0.9469816272965879,
"calibration/coverage@5%": 0.09396325459317587,
"calibration/ece": 0.21072273147614853,
"calibration/mean_confidence": 0.5744268236371306,
"calibration/prompt_uniqueness": 0.8518122594254635,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.011631944444444464,
"completions/max_length": 3182.0,
"completions/max_terminated_length": 3182.0,
"completions/mean_length": 764.2948852539063,
"completions/mean_terminated_length": 773.3405883789062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 213.8,
"epoch": 0.44399445006937416,
"grad_norm": 0.00039282196667045355,
"learning_rate": 6.927710843373495e-07,
"loss": -0.0094,
"num_tokens": 415957963.0,
"reward": 0.9903581857681274,
"reward_std": 0.13288189321756363,
"rewards/accuracy_reward": 0.6887152671813965,
"rewards/brier_reward": 0.8001930832862854,
"rewards/confidence_uniqueness_reward": 0.9394657015800476,
"rewards/format_reward": 0.9883680582046509,
"rewards/frontier_aurc_reward": -0.0016249807551503182,
"rewards/frontier_coverage_0": 0.014122280664741993,
"rewards/frontier_coverage_1": 0.014122280664741993,
"rewards/frontier_coverage_10": 0.014189984847325832,
"rewards/frontier_coverage_15": 0.030132049694657326,
"rewards/frontier_coverage_20": 0.0688400574028492,
"rewards/frontier_coverage_25": 0.14342034608125687,
"rewards/frontier_coverage_5": 0.014122280664741993,
"rewards/frontier_ece_reward": -0.0014869593200273813,
"rewards/frontier_entropy_batch_reward": -0.25717237293720246,
"signal/accuracy_reward/centered_abs_mean": 0.1658420145511627,
"signal/accuracy_reward/group_bin_occupancy": 0.19965277777777776,
"signal/accuracy_reward/group_std_mean": 0.21452577412128448,
"signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08292100727558135,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08292100727558135,
"signal/advantage_abs_mean": 0.09931548237800598,
"signal/advantage_pre_scale_abs_mean": 0.09931548237800598,
"signal/advantage_pre_scale_std": 0.1559781938791275,
"signal/advantage_std": 0.1559781938791275,
"signal/brier_reward/centered_abs_mean": 0.13490980863571167,
"signal/brier_reward/group_bin_occupancy": 0.8333333333333334,
"signal/brier_reward/group_std_mean": 0.1740681231021881,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013490980863571167,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013490980863571167,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0304035734385252,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.840625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05106213316321373,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030403575394302605,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030403575394302605,
"signal/format_reward/centered_abs_mean": 0.01956380195915699,
"signal/format_reward/group_bin_occupancy": 0.14513888888888887,
"signal/format_reward/group_std_mean": 0.03807148076593876,
"signal/format_reward/group_zero_std_frac": 0.8388888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009781900979578494,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.009781900979578494,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019493137020617723,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.679513888888889,
"signal/frontier_aurc_reward/group_std_mean": 0.0034036038909107448,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.436642062093597e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.436642062093597e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1955573081970215,
"signal/frontier_coverage_0/group_bin_occupancy": 0.83125,
"signal/frontier_coverage_0/group_std_mean": 0.2514010012149811,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024444664362818004,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024444664362818004,
"signal/frontier_coverage_1/centered_abs_mean": 0.1955573081970215,
"signal/frontier_coverage_1/group_bin_occupancy": 0.83125,
"signal/frontier_coverage_1/group_std_mean": 0.2514010012149811,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024444664362818004,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024444664362818004,
"signal/frontier_coverage_10/centered_abs_mean": 0.19504148066043853,
"signal/frontier_coverage_10/group_bin_occupancy": 0.83125,
"signal/frontier_coverage_10/group_std_mean": 0.2507701963186264,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002438018564134836,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002438018564134836,
"signal/frontier_coverage_15/centered_abs_mean": 0.08795134276151657,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8697916666666667,
"signal/frontier_coverage_15/group_std_mean": 0.11580315828323365,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010993918171152473,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010993918171152473,
"signal/frontier_coverage_20/centered_abs_mean": 0.06347033008933067,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9170138888888889,
"signal/frontier_coverage_20/group_std_mean": 0.08160731345415115,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007933791261166334,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007933791261166334,
"signal/frontier_coverage_25/centered_abs_mean": 0.10018244087696075,
"signal/frontier_coverage_25/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_25/group_std_mean": 0.13163245618343353,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012522805249318481,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012522805249318481,
"signal/frontier_coverage_5/centered_abs_mean": 0.1955573081970215,
"signal/frontier_coverage_5/group_bin_occupancy": 0.83125,
"signal/frontier_coverage_5/group_std_mean": 0.2514010012149811,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024444664362818004,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024444664362818004,
"signal/frontier_ece_reward/centered_abs_mean": 0.020365006104111672,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8649305555555555,
"signal/frontier_ece_reward/group_std_mean": 0.025510191544890405,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00203650058247149,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00203650058247149,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3127790868282318,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7649305555555554,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38549832701683046,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03127790912985802,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03127790912985802,
"step": 185
},
{
"calibration/aurc": 0.1639593065228973,
"calibration/batch_distribution_entropy": 0.9539581142043598,
"calibration/batch_entropy_100bins": 0.9483003354272391,
"calibration/batch_entropy_10bins": 0.9539581142043598,
"calibration/batch_entropy_50bins": 0.957835409824707,
"calibration/batch_uniqueness": 0.9477393587487668,
"calibration/buffer_distribution_entropy": 0.9859487971640286,
"calibration/buffer_entropy_100bins": 0.9926121558733081,
"calibration/buffer_entropy_10bins": 0.9859487971640286,
"calibration/buffer_entropy_50bins": 0.9914107968063413,
"calibration/confidence_entropy": 0.4999114909521826,
"calibration/coverage@0%": 0.024051083448119896,
"calibration/coverage@1%": 0.024051083448119896,
"calibration/coverage@10%": 0.33487271540469976,
"calibration/coverage@15%": 0.5131284671539099,
"calibration/coverage@20%": 0.7305083738512776,
"calibration/coverage@25%": 0.9254270017406441,
"calibration/coverage@30%": 0.9780678851174935,
"calibration/coverage@5%": 0.17004451834186524,
"calibration/ece": 0.20221066968086862,
"calibration/mean_confidence": 0.5911521434855616,
"calibration/prompt_uniqueness": 0.8555883534825799,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004687499999999978,
"completions/max_length": 3312.2,
"completions/max_terminated_length": 3312.2,
"completions/mean_length": 765.2302124023438,
"completions/mean_terminated_length": 768.8353393554687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 241.0,
"epoch": 0.45599430007124914,
"grad_norm": 0.0003932048275601119,
"learning_rate": 5.421686746987952e-07,
"loss": -0.003,
"num_tokens": 427856359.0,
"reward": 1.0095869898796082,
"reward_std": 0.12467661201953888,
"rewards/accuracy_reward": 0.7232638955116272,
"rewards/brier_reward": 0.8051016926765442,
"rewards/confidence_uniqueness_reward": 0.9449184775352478,
"rewards/format_reward": 0.9951388835906982,
"rewards/frontier_aurc_reward": -0.0015021357918158173,
"rewards/frontier_coverage_0": -0.003796599945053458,
"rewards/frontier_coverage_1": -0.003796599945053458,
"rewards/frontier_coverage_10": -0.0035698655527085068,
"rewards/frontier_coverage_15": 0.02750418670475483,
"rewards/frontier_coverage_20": 0.07788380682468414,
"rewards/frontier_coverage_25": 0.15804124176502227,
"rewards/frontier_coverage_5": -0.003777299216017127,
"rewards/frontier_ece_reward": -0.004084828868508339,
"rewards/frontier_entropy_batch_reward": -0.27295289635658265,
"signal/accuracy_reward/centered_abs_mean": 0.1623914957046509,
"signal/accuracy_reward/group_bin_occupancy": 0.20243055555555553,
"signal/accuracy_reward/group_std_mean": 0.21437447667121887,
"signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08119574785232545,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08119574785232545,
"signal/advantage_abs_mean": 0.09301377832889557,
"signal/advantage_pre_scale_abs_mean": 0.09301377832889557,
"signal/advantage_pre_scale_std": 0.14447612464427947,
"signal/advantage_std": 0.14447612464427947,
"signal/brier_reward/centered_abs_mean": 0.1285407453775406,
"signal/brier_reward/group_bin_occupancy": 0.8503472222222221,
"signal/brier_reward/group_std_mean": 0.16547386050224305,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012854074873030186,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012854074873030186,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021638569980859758,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8777777777777779,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0373595766723156,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002163857058621943,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002163857058621943,
"signal/format_reward/centered_abs_mean": 0.00916883684694767,
"signal/format_reward/group_bin_occupancy": 0.1388888888888889,
"signal/format_reward/group_std_mean": 0.022394910082221033,
"signal/format_reward/group_zero_std_frac": 0.8888888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004584418423473835,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004584418423473835,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017511979909613728,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6802083333333333,
"signal/frontier_aurc_reward/group_std_mean": 0.0030707920901477336,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.18899756873725e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.18899756873725e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19192939400672912,
"signal/frontier_coverage_0/group_bin_occupancy": 0.83125,
"signal/frontier_coverage_0/group_std_mean": 0.24938672184944152,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023991174064576628,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023991174064576628,
"signal/frontier_coverage_1/centered_abs_mean": 0.19192939400672912,
"signal/frontier_coverage_1/group_bin_occupancy": 0.83125,
"signal/frontier_coverage_1/group_std_mean": 0.24938672184944152,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023991174064576628,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023991174064576628,
"signal/frontier_coverage_10/centered_abs_mean": 0.19131710529327392,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8326388888888889,
"signal/frontier_coverage_10/group_std_mean": 0.24862921237945557,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023914637975394728,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023914637975394728,
"signal/frontier_coverage_15/centered_abs_mean": 0.07733116149902344,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8697916666666666,
"signal/frontier_coverage_15/group_std_mean": 0.10203811377286912,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009666395024396479,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009666395024396479,
"signal/frontier_coverage_20/centered_abs_mean": 0.06752799674868584,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9149305555555556,
"signal/frontier_coverage_20/group_std_mean": 0.0865270435810089,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008440999779850244,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008440999779850244,
"signal/frontier_coverage_25/centered_abs_mean": 0.1067051038146019,
"signal/frontier_coverage_25/group_bin_occupancy": 0.884375,
"signal/frontier_coverage_25/group_std_mean": 0.13877106308937073,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013338138349354267,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013338138349354267,
"signal/frontier_coverage_5/centered_abs_mean": 0.1918783277273178,
"signal/frontier_coverage_5/group_bin_occupancy": 0.83125,
"signal/frontier_coverage_5/group_std_mean": 0.24932389259338378,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00239847912453115,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00239847912453115,
"signal/frontier_ece_reward/centered_abs_mean": 0.020100595057010652,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8600694444444444,
"signal/frontier_ece_reward/group_std_mean": 0.02535393163561821,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002010059542953968,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002010059542953968,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32644957304000854,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7652777777777778,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39766885042190553,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0326449565589428,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0326449565589428,
"step": 190
},
{
"calibration/aurc": 0.1706777636504669,
"calibration/batch_distribution_entropy": 0.9790970951079817,
"calibration/batch_entropy_100bins": 0.9607017265781762,
"calibration/batch_entropy_10bins": 0.9790970951079817,
"calibration/batch_entropy_50bins": 0.9733776016996145,
"calibration/batch_uniqueness": 0.9518248437650525,
"calibration/buffer_distribution_entropy": 0.985472118915245,
"calibration/buffer_entropy_100bins": 0.9923770458774129,
"calibration/buffer_entropy_10bins": 0.985472118915245,
"calibration/buffer_entropy_50bins": 0.9911377661542394,
"calibration/confidence_entropy": 0.49873662902140675,
"calibration/coverage@0%": 0.033667852911883595,
"calibration/coverage@1%": 0.033667852911883595,
"calibration/coverage@10%": 0.3685121918209364,
"calibration/coverage@15%": 0.4559732123356472,
"calibration/coverage@20%": 0.5829903013320054,
"calibration/coverage@25%": 0.7578634163247334,
"calibration/coverage@30%": 0.9112590887234259,
"calibration/coverage@5%": 0.27687617078576193,
"calibration/ece": 0.18384669897594583,
"calibration/mean_confidence": 0.551976400559588,
"calibration/prompt_uniqueness": 0.848788807132955,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010243055555555557,
"completions/max_length": 3561.0,
"completions/max_terminated_length": 3561.0,
"completions/mean_length": 777.999560546875,
"completions/mean_terminated_length": 786.172314453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 231.8,
"epoch": 0.46799415007312406,
"grad_norm": 0.00040629622526466846,
"learning_rate": 3.91566265060241e-07,
"loss": -0.0077,
"num_tokens": 439899778.0,
"reward": 0.986942994594574,
"reward_std": 0.1280568614602089,
"rewards/accuracy_reward": 0.6803819537162781,
"rewards/brier_reward": 0.7949827075004577,
"rewards/confidence_uniqueness_reward": 0.9411714434623718,
"rewards/format_reward": 0.9897569417953491,
"rewards/frontier_aurc_reward": -0.0018961447989568115,
"rewards/frontier_coverage_0": 0.012605349812656642,
"rewards/frontier_coverage_1": 0.012605349812656642,
"rewards/frontier_coverage_10": 0.012725694989785551,
"rewards/frontier_coverage_15": 0.03210941143333912,
"rewards/frontier_coverage_20": 0.07903910428285599,
"rewards/frontier_coverage_25": 0.15094499289989471,
"rewards/frontier_coverage_5": 0.012612746376544238,
"rewards/frontier_ece_reward": -0.0022546866443008185,
"rewards/frontier_entropy_batch_reward": -0.25400757491588594,
"signal/accuracy_reward/centered_abs_mean": 0.1535264790058136,
"signal/accuracy_reward/group_bin_occupancy": 0.19861111111111113,
"signal/accuracy_reward/group_std_mean": 0.20471644699573516,
"signal/accuracy_reward/group_zero_std_frac": 0.41111111640930176,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0767632395029068,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0767632395029068,
"signal/advantage_abs_mean": 0.09638960659503937,
"signal/advantage_pre_scale_abs_mean": 0.09638960659503937,
"signal/advantage_pre_scale_std": 0.1507657587528229,
"signal/advantage_std": 0.1507657587528229,
"signal/brier_reward/centered_abs_mean": 0.1326186940073967,
"signal/brier_reward/group_bin_occupancy": 0.8368055555555556,
"signal/brier_reward/group_std_mean": 0.17211284935474397,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013261870108544826,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013261870108544826,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026984479278326035,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8788194444444445,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04143795669078827,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026984480675309895,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026984480675309895,
"signal/format_reward/centered_abs_mean": 0.01567925335839391,
"signal/format_reward/group_bin_occupancy": 0.1388888888888889,
"signal/format_reward/group_std_mean": 0.027652311697602273,
"signal/format_reward/group_zero_std_frac": 0.8888888955116272,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007839626679196954,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.007839626679196954,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023088094778358935,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6784722222222221,
"signal/frontier_aurc_reward/group_std_mean": 0.00435796077363193,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.88601171632763e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.88601171632763e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18040402829647065,
"signal/frontier_coverage_0/group_bin_occupancy": 0.840625,
"signal/frontier_coverage_0/group_std_mean": 0.23687632083892823,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022550504421815277,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022550504421815277,
"signal/frontier_coverage_1/centered_abs_mean": 0.18040402829647065,
"signal/frontier_coverage_1/group_bin_occupancy": 0.840625,
"signal/frontier_coverage_1/group_std_mean": 0.23687632083892823,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022550504421815277,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022550504421815277,
"signal/frontier_coverage_10/centered_abs_mean": 0.17983727753162385,
"signal/frontier_coverage_10/group_bin_occupancy": 0.840625,
"signal/frontier_coverage_10/group_std_mean": 0.23616442382335662,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022479661041870714,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022479661041870714,
"signal/frontier_coverage_15/centered_abs_mean": 0.06941870003938674,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8961805555555555,
"signal/frontier_coverage_15/group_std_mean": 0.09146715700626373,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008677337900735438,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008677337900735438,
"signal/frontier_coverage_20/centered_abs_mean": 0.06973416805267334,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9184027777777779,
"signal/frontier_coverage_20/group_std_mean": 0.09039737284183502,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008716771146282554,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008716771146282554,
"signal/frontier_coverage_25/centered_abs_mean": 0.11032099574804306,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8857638888888889,
"signal/frontier_coverage_25/group_std_mean": 0.14418871700763702,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013790124328806996,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013790124328806996,
"signal/frontier_coverage_5/centered_abs_mean": 0.18039599657058716,
"signal/frontier_coverage_5/group_bin_occupancy": 0.840625,
"signal/frontier_coverage_5/group_std_mean": 0.23686636984348297,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022549499990418553,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022549499990418553,
"signal/frontier_ece_reward/centered_abs_mean": 0.018910813704133035,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8722222222222221,
"signal/frontier_ece_reward/group_std_mean": 0.024173206835985183,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018910813611000775,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018910813611000775,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.312946754693985,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7586805555555556,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3856872797012329,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031294677406549454,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031294677406549454,
"step": 195
},
{
"calibration/aurc": 0.13533203632938168,
"calibration/batch_distribution_entropy": 0.9504027952989377,
"calibration/batch_entropy_100bins": 0.9457547318105052,
"calibration/batch_entropy_10bins": 0.9504027952989377,
"calibration/batch_entropy_50bins": 0.9554350823985063,
"calibration/batch_uniqueness": 0.9465724141007226,
"calibration/buffer_distribution_entropy": 0.9853970155873112,
"calibration/buffer_entropy_100bins": 0.9923398769087163,
"calibration/buffer_entropy_10bins": 0.9853970155873112,
"calibration/buffer_entropy_50bins": 0.9911092633839509,
"calibration/confidence_entropy": 0.5118639401115025,
"calibration/coverage@0%": 0.02978339992651786,
"calibration/coverage@1%": 0.02978339992651786,
"calibration/coverage@10%": 0.4239939262423073,
"calibration/coverage@15%": 0.5175246279966933,
"calibration/coverage@20%": 0.8703728357215027,
"calibration/coverage@25%": 0.9216995614035088,
"calibration/coverage@30%": 0.9515789473684212,
"calibration/coverage@5%": 0.31793581220721967,
"calibration/ece": 0.1757973301190792,
"calibration/mean_confidence": 0.6121883324285666,
"calibration/prompt_uniqueness": 0.8583440433333885,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006597222222222232,
"completions/max_length": 3650.8,
"completions/max_terminated_length": 3650.8,
"completions/mean_length": 752.8812622070312,
"completions/mean_terminated_length": 757.93525390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 244.2,
"epoch": 0.47999400007499904,
"grad_norm": 0.00041422457434237003,
"learning_rate": 2.409638554216868e-07,
"loss": -0.0054,
"num_tokens": 451640778.0,
"reward": 1.0022491931915283,
"reward_std": 0.12217179834842681,
"rewards/accuracy_reward": 0.7116319417953492,
"rewards/brier_reward": 0.8065296888351441,
"rewards/confidence_uniqueness_reward": 0.9426613330841065,
"rewards/format_reward": 0.9933159708976745,
"rewards/frontier_aurc_reward": -0.0018987123388797046,
"rewards/frontier_coverage_0": 0.001754038338549435,
"rewards/frontier_coverage_1": 0.001754038338549435,
"rewards/frontier_coverage_10": 0.0023555623716674744,
"rewards/frontier_coverage_15": 0.03378410004079342,
"rewards/frontier_coverage_20": 0.0899100884795189,
"rewards/frontier_coverage_25": 0.16996320486068725,
"rewards/frontier_coverage_5": 0.001754038338549435,
"rewards/frontier_ece_reward": -0.003993240976706147,
"rewards/frontier_entropy_batch_reward": -0.28486764430999756,
"signal/accuracy_reward/centered_abs_mean": 0.1452473983168602,
"signal/accuracy_reward/group_bin_occupancy": 0.19479166666666667,
"signal/accuracy_reward/group_std_mean": 0.19380164742469788,
"signal/accuracy_reward/group_zero_std_frac": 0.44166666865348814,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0726236991584301,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0726236991584301,
"signal/advantage_abs_mean": 0.09101023375988007,
"signal/advantage_pre_scale_abs_mean": 0.09101023375988007,
"signal/advantage_pre_scale_std": 0.14412825107574462,
"signal/advantage_std": 0.14412825107574462,
"signal/brier_reward/centered_abs_mean": 0.12795960605144502,
"signal/brier_reward/group_bin_occupancy": 0.8381944444444445,
"signal/brier_reward/group_std_mean": 0.16680757701396942,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012795961275696755,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012795961275696755,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02437374070286751,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8774305555555555,
"signal/confidence_uniqueness_reward/group_std_mean": 0.039960439503192904,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002437374135479331,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002437374135479331,
"signal/format_reward/centered_abs_mean": 0.011984592024236917,
"signal/format_reward/group_bin_occupancy": 0.1388888888888889,
"signal/format_reward/group_std_mean": 0.024961976706981658,
"signal/format_reward/group_zero_std_frac": 0.8888889074325561,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005992296012118458,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005992296012118458,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022553114220499994,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6885416666666668,
"signal/frontier_aurc_reward/group_std_mean": 0.003917370270937681,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.81913933577016e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.81913933577016e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1716437578201294,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8284722222222223,
"signal/frontier_coverage_0/group_std_mean": 0.22425118684768677,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021455470705404878,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021455470705404878,
"signal/frontier_coverage_1/centered_abs_mean": 0.1716437578201294,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8284722222222223,
"signal/frontier_coverage_1/group_std_mean": 0.22425118684768677,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021455470705404878,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021455470705404878,
"signal/frontier_coverage_10/centered_abs_mean": 0.16941776275634765,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8256944444444445,
"signal/frontier_coverage_10/group_std_mean": 0.22150866985321044,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021177220391109587,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021177220391109587,
"signal/frontier_coverage_15/centered_abs_mean": 0.06261588633060455,
"signal/frontier_coverage_15/group_bin_occupancy": 0.898263888888889,
"signal/frontier_coverage_15/group_std_mean": 0.08280375897884369,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007826985907740891,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007826985907740891,
"signal/frontier_coverage_20/centered_abs_mean": 0.07480958104133606,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9163194444444445,
"signal/frontier_coverage_20/group_std_mean": 0.09603887796401978,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009351198212243616,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009351198212243616,
"signal/frontier_coverage_25/centered_abs_mean": 0.11988835930824279,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8927083333333334,
"signal/frontier_coverage_25/group_std_mean": 0.1552154928445816,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014986045192927123,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014986045192927123,
"signal/frontier_coverage_5/centered_abs_mean": 0.1716437578201294,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8284722222222223,
"signal/frontier_coverage_5/group_std_mean": 0.22425118684768677,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021455470705404878,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021455470705404878,
"signal/frontier_ece_reward/centered_abs_mean": 0.01908930353820324,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8739583333333334,
"signal/frontier_ece_reward/group_std_mean": 0.023905428871512414,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019089303910732268,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019089303910732268,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.328853166103363,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39871172308921815,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03288531787693501,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03288531787693501,
"step": 200
},
{
"epoch": 0.47999400007499904,
"eval_calibration/aurc": 0.14756764044678702,
"eval_calibration/batch_distribution_entropy": 0.9258539583207644,
"eval_calibration/batch_entropy_100bins": 0.705329766878307,
"eval_calibration/batch_entropy_10bins": 0.9258539583207644,
"eval_calibration/batch_entropy_50bins": 0.7811867066286015,
"eval_calibration/batch_uniqueness": 0.8942650071540063,
"eval_calibration/buffer_distribution_entropy": 0.9848900187220755,
"eval_calibration/buffer_entropy_100bins": 0.9920892460675286,
"eval_calibration/buffer_entropy_10bins": 0.9848900187220755,
"eval_calibration/buffer_entropy_50bins": 0.9908194394047567,
"eval_calibration/confidence_entropy": 0.48096033335039917,
"eval_calibration/coverage@0%": 0.25739247311827956,
"eval_calibration/coverage@1%": 0.25739247311827956,
"eval_calibration/coverage@10%": 0.4065860215053763,
"eval_calibration/coverage@15%": 0.6117271505376344,
"eval_calibration/coverage@20%": 0.70127688172043,
"eval_calibration/coverage@25%": 0.936491935483871,
"eval_calibration/coverage@30%": 0.9946236559139785,
"eval_calibration/coverage@5%": 0.25739247311827956,
"eval_calibration/ece": 0.22866607442963627,
"eval_calibration/mean_confidence": 0.5914768839699495,
"eval_calibration/prompt_uniqueness": 0.8942650071540063,
"eval_completions/clipped_ratio": 0.00434027777777779,
"eval_completions/max_length": 2311.6666666666665,
"eval_completions/max_terminated_length": 2311.6666666666665,
"eval_completions/mean_length": 765.7859497070312,
"eval_completions/mean_terminated_length": 769.0821126302084,
"eval_completions/min_length": 147.33333333333334,
"eval_completions/min_terminated_length": 270.8333333333333,
"eval_loss": 0.0,
"eval_num_tokens": 451640778.0,
"eval_reward": 0.9159158170223236,
"eval_reward_std": 0.2335003837943077,
"eval_rewards/accuracy_reward": 0.6875,
"eval_rewards/brier_reward": 0.8085997502009074,
"eval_rewards/confidence_uniqueness_reward": 0.8914510905742645,
"eval_rewards/format_reward": 0.9939236144224802,
"eval_rewards/frontier_aurc_reward": -0.0018178720492869616,
"eval_rewards/frontier_coverage_0": 0.019008846589713357,
"eval_rewards/frontier_coverage_1": 0.019008846589713357,
"eval_rewards/frontier_coverage_10": 0.01966132320618878,
"eval_rewards/frontier_coverage_15": 0.039734075466791786,
"eval_rewards/frontier_coverage_20": 0.09554279471437137,
"eval_rewards/frontier_coverage_25": 0.17320440957943598,
"eval_rewards/frontier_coverage_5": 0.019010902382433414,
"eval_rewards/frontier_ece_reward": -0.0020065721619175747,
"eval_rewards/frontier_entropy_batch_reward": -0.9939236144224802,
"eval_runtime": 181.543,
"eval_samples_per_second": 5.508,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4176432291666667,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.463163028160731,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20882161458333334,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20882161458333334,
"eval_signal/advantage_abs_mean": 0.20389158527056375,
"eval_signal/advantage_pre_scale_abs_mean": 0.20389158527056375,
"eval_signal/advantage_pre_scale_std": 0.23148142298062643,
"eval_signal/advantage_std": 0.23148142298062643,
"eval_signal/brier_reward/centered_abs_mean": 0.17776375015576681,
"eval_signal/brier_reward/group_bin_occupancy": 0.8645833333333334,
"eval_signal/brier_reward/group_std_mean": 0.227944349249204,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01777637532601754,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01777637532601754,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04837593622505665,
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.40624999999999994,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0713024524350961,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004837593606983622,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004837593606983622,
"eval_signal/format_reward/centered_abs_mean": 0.01177300326526165,
"eval_signal/format_reward/group_bin_occupancy": 0.14930555555555555,
"eval_signal/format_reward/group_std_mean": 0.034373246133327484,
"eval_signal/format_reward/group_zero_std_frac": 0.8055555721124014,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.005886501632630825,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.005886501632630825,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0030702337777862945,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.5902777777777778,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.006556036416441202,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.83779224648606e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.83779224648606e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.26045608272155124,
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9444444444444443,
"eval_signal/frontier_coverage_0/group_std_mean": 0.3647051453590393,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003255701197000841,
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003255701197000841,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.26045608272155124,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9444444444444443,
"eval_signal/frontier_coverage_1/group_std_mean": 0.3647051453590393,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003255701197000841,
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003255701197000841,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.25206231077512103,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9444444444444443,
"eval_signal/frontier_coverage_10/group_std_mean": 0.35428506632645923,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003150779055431485,
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003150779055431485,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.07933254291613896,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9097222222222222,
"eval_signal/frontier_coverage_15/group_std_mean": 0.11005314812064171,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009916568330178659,
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009916568330178659,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.12251939376195271,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9409722222222223,
"eval_signal/frontier_coverage_20/group_std_mean": 0.15402295937140784,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015314924918736021,
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015314924918736021,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.22701545556386313,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.2766217887401581,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028376932411144176,
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028376932411144176,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2604496479034424,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9444444444444443,
"eval_signal/frontier_coverage_5/group_std_mean": 0.36469681064287823,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032556206764032445,
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032556206764032445,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0270277534921964,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9409722222222222,
"eval_signal/frontier_ece_reward/group_std_mean": 0.034523426865537964,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002702775450112919,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002702775450112919,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.01177300326526165,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.14930555555555555,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.034373246133327484,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8055555721124014,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0011773003886143367,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0011773003886143367,
"eval_steps_per_second": 0.033,
"step": 200
},
{
"calibration/aurc": 0.1652033711867475,
"calibration/batch_distribution_entropy": 0.9384595336180503,
"calibration/batch_entropy_100bins": 0.9400339651108552,
"calibration/batch_entropy_10bins": 0.9384595336180503,
"calibration/batch_entropy_50bins": 0.9475336542177166,
"calibration/batch_uniqueness": 0.9436695127060112,
"calibration/buffer_distribution_entropy": 0.9847586785563566,
"calibration/buffer_entropy_100bins": 0.9920408823055707,
"calibration/buffer_entropy_10bins": 0.9847586785563566,
"calibration/buffer_entropy_50bins": 0.9907514046533752,
"calibration/confidence_entropy": 0.4783915000591916,
"calibration/coverage@0%": 0.01204360634059349,
"calibration/coverage@1%": 0.01204360634059349,
"calibration/coverage@10%": 0.2860696019089487,
"calibration/coverage@15%": 0.4156604098828991,
"calibration/coverage@20%": 0.8064645320617825,
"calibration/coverage@25%": 0.9036748593935308,
"calibration/coverage@30%": 0.9464806869956538,
"calibration/coverage@5%": 0.08500383239346383,
"calibration/ece": 0.12629128286363533,
"calibration/mean_confidence": 0.6265111829809916,
"calibration/prompt_uniqueness": 0.8555825096487398,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.006076388888888884,
"completions/max_length": 3488.0,
"completions/max_terminated_length": 3488.0,
"completions/mean_length": 771.4147583007813,
"completions/mean_terminated_length": 776.1726318359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 231.6,
"epoch": 0.491993850076874,
"grad_norm": 0.00033893538056872785,
"learning_rate": 9.036144578313253e-08,
"loss": -0.0041,
"num_tokens": 463593428.0,
"reward": 1.0165271043777466,
"reward_std": 0.12039815932512284,
"rewards/accuracy_reward": 0.7393229007720947,
"rewards/brier_reward": 0.8207941651344299,
"rewards/confidence_uniqueness_reward": 0.9416178464889526,
"rewards/format_reward": 0.9938368082046509,
"rewards/frontier_aurc_reward": -0.0013881307444535197,
"rewards/frontier_coverage_0": 0.0027549955993890762,
"rewards/frontier_coverage_1": 0.0027549955993890762,
"rewards/frontier_coverage_10": 0.005501348234247416,
"rewards/frontier_coverage_15": 0.04494566917419433,
"rewards/frontier_coverage_20": 0.11320073753595353,
"rewards/frontier_coverage_25": 0.20637257397174835,
"rewards/frontier_coverage_5": 0.0027576935943216087,
"rewards/frontier_ece_reward": -0.004469462623819709,
"rewards/frontier_entropy_batch_reward": -0.30558276176452637,
"signal/accuracy_reward/centered_abs_mean": 0.14855143129825593,
"signal/accuracy_reward/group_bin_occupancy": 0.19895833333333332,
"signal/accuracy_reward/group_std_mean": 0.2020564168691635,
"signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07427571564912797,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07427571564912797,
"signal/advantage_abs_mean": 0.08845392167568207,
"signal/advantage_pre_scale_abs_mean": 0.08845392167568207,
"signal/advantage_pre_scale_std": 0.14208076894283295,
"signal/advantage_std": 0.14208076894283295,
"signal/brier_reward/centered_abs_mean": 0.12419998794794082,
"signal/brier_reward/group_bin_occupancy": 0.8274305555555556,
"signal/brier_reward/group_std_mean": 0.1606125205755234,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01241999827325344,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01241999827325344,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02462100312113762,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8798611111111112,
"signal/confidence_uniqueness_reward/group_std_mean": 0.039499569684267044,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024621004471555353,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024621004471555353,
"signal/format_reward/centered_abs_mean": 0.0109103734488599,
"signal/format_reward/group_bin_occupancy": 0.13784722222222223,
"signal/format_reward/group_std_mean": 0.02283493857830763,
"signal/format_reward/group_zero_std_frac": 0.8972222328186035,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00545518672442995,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00545518672442995,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016165346140041947,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6725694444444444,
"signal/frontier_aurc_reward/group_std_mean": 0.0028643927304074167,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0206683984724805e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0206683984724805e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.17496979832649232,
"signal/frontier_coverage_0/group_bin_occupancy": 0.814236111111111,
"signal/frontier_coverage_0/group_std_mean": 0.2312620609998703,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021871224977076053,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021871224977076053,
"signal/frontier_coverage_1/centered_abs_mean": 0.17496979832649232,
"signal/frontier_coverage_1/group_bin_occupancy": 0.814236111111111,
"signal/frontier_coverage_1/group_std_mean": 0.2312620609998703,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021871224977076053,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021871224977076053,
"signal/frontier_coverage_10/centered_abs_mean": 0.16565968990325927,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8069444444444445,
"signal/frontier_coverage_10/group_std_mean": 0.21950293183326722,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002070746128447354,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002070746128447354,
"signal/frontier_coverage_15/centered_abs_mean": 0.0636880062520504,
"signal/frontier_coverage_15/group_bin_occupancy": 0.9159722222222222,
"signal/frontier_coverage_15/group_std_mean": 0.08212085962295532,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007961000897921622,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007961000897921622,
"signal/frontier_coverage_20/centered_abs_mean": 0.07905065417289733,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9180555555555555,
"signal/frontier_coverage_20/group_std_mean": 0.1002379298210144,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009881331818178297,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009881331818178297,
"signal/frontier_coverage_25/centered_abs_mean": 0.12090887576341629,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8913194444444444,
"signal/frontier_coverage_25/group_std_mean": 0.15563631057739258,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015113609610125423,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015113609610125423,
"signal/frontier_coverage_5/centered_abs_mean": 0.17495956420898437,
"signal/frontier_coverage_5/group_bin_occupancy": 0.814236111111111,
"signal/frontier_coverage_5/group_std_mean": 0.2312490999698639,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021869946271181107,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021869946271181107,
"signal/frontier_ece_reward/centered_abs_mean": 0.01855614297091961,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8444444444444444,
"signal/frontier_ece_reward/group_std_mean": 0.023571832850575448,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001855614292435348,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001855614292435348,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3295231759548187,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.753125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39651084542274473,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03295231983065605,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03295231983065605,
"step": 205
},
{
"calibration/aurc": 0.07742026917061483,
"calibration/batch_distribution_entropy": 0.9536937703254748,
"calibration/batch_entropy_100bins": 0.9492115543702568,
"calibration/batch_entropy_10bins": 0.9536937703254748,
"calibration/batch_entropy_50bins": 0.9579820664320953,
"calibration/batch_uniqueness": 0.9475172437688495,
"calibration/buffer_distribution_entropy": 0.9842747561446203,
"calibration/buffer_entropy_100bins": 0.9917916027464883,
"calibration/buffer_entropy_10bins": 0.9842747561446203,
"calibration/buffer_entropy_50bins": 0.9904588388299497,
"calibration/confidence_entropy": 0.49376543330122047,
"calibration/coverage@0%": 0.09232020559648955,
"calibration/coverage@1%": 0.09232020559648955,
"calibration/coverage@10%": 0.7053527082507439,
"calibration/coverage@15%": 0.8343790867542458,
"calibration/coverage@20%": 0.9407132995229177,
"calibration/coverage@25%": 0.9851657940663175,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.5143739832952852,
"calibration/ece": 0.15782109592263618,
"calibration/mean_confidence": 0.6163736283766634,
"calibration/prompt_uniqueness": 0.8510770487877534,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004484953703703683,
"completions/max_length": 3489.0,
"completions/max_terminated_length": 3489.0,
"completions/mean_length": 780.5936075846354,
"completions/mean_terminated_length": 784.121337890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 227.0,
"epoch": 0.49919376007799904,
"num_tokens": 470854939.0,
"reward": 0.9987632036209106,
"reward_std": 0.11778175334135692,
"rewards/accuracy_reward": 0.7009548544883728,
"rewards/brier_reward": 0.8073068459828695,
"rewards/confidence_uniqueness_reward": 0.9438951214154562,
"rewards/format_reward": 0.9955150485038757,
"rewards/frontier_aurc_reward": -0.001374229167898496,
"rewards/frontier_coverage_0": 0.009927504695951939,
"rewards/frontier_coverage_1": 0.009927504695951939,
"rewards/frontier_coverage_10": 0.011239175374309221,
"rewards/frontier_coverage_15": 0.04281615341703097,
"rewards/frontier_coverage_20": 0.10291850566864014,
"rewards/frontier_coverage_25": 0.1833156297604243,
"rewards/frontier_coverage_5": 0.009930253960192204,
"rewards/frontier_ece_reward": -0.003355810030673941,
"rewards/frontier_entropy_batch_reward": -0.2886508007844289,
"signal/accuracy_reward/centered_abs_mean": 0.14790401111046472,
"signal/accuracy_reward/group_bin_occupancy": 0.19618055555555555,
"signal/accuracy_reward/group_std_mean": 0.19730964303016663,
"signal/accuracy_reward/group_zero_std_frac": 0.4305555621782939,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07395200555523236,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07395200555523236,
"signal/advantage_abs_mean": 0.08715297033389409,
"signal/advantage_pre_scale_abs_mean": 0.08715297033389409,
"signal/advantage_pre_scale_std": 0.1391968379418055,
"signal/advantage_std": 0.1391968379418055,
"signal/brier_reward/centered_abs_mean": 0.1294451653957367,
"signal/brier_reward/group_bin_occupancy": 0.84375,
"signal/brier_reward/group_std_mean": 0.16660779217878977,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012944516415397326,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012944516415397326,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02156602032482624,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8703703703703703,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03804971898595492,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021566021411369243,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021566021411369243,
"signal/format_reward/centered_abs_mean": 0.008563006296753883,
"signal/format_reward/group_bin_occupancy": 0.1394675925925926,
"signal/format_reward/group_std_mean": 0.02229359808067481,
"signal/format_reward/group_zero_std_frac": 0.8842592835426331,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004281503148376942,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004281503148376942,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017085896106436849,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6903935185185185,
"signal/frontier_aurc_reward/group_std_mean": 0.0031021018512547016,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1357368799120497e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1357368799120497e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18459606170654297,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8287037037037037,
"signal/frontier_coverage_0/group_std_mean": 0.24103171626726785,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023074508644640446,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023074508644640446,
"signal/frontier_coverage_1/centered_abs_mean": 0.18459606170654297,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8287037037037037,
"signal/frontier_coverage_1/group_std_mean": 0.24103171626726785,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023074508644640446,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023074508644640446,
"signal/frontier_coverage_10/centered_abs_mean": 0.17488996187845865,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8275462962962963,
"signal/frontier_coverage_10/group_std_mean": 0.22865218917528787,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021861245234807334,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021861245234807334,
"signal/frontier_coverage_15/centered_abs_mean": 0.06289837509393692,
"signal/frontier_coverage_15/group_bin_occupancy": 0.9097222222222223,
"signal/frontier_coverage_15/group_std_mean": 0.08136197924613953,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007862297352403402,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007862297352403402,
"signal/frontier_coverage_20/centered_abs_mean": 0.07549887150526047,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9224537037037037,
"signal/frontier_coverage_20/group_std_mean": 0.09611385067303975,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009437358821742237,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009437358821742237,
"signal/frontier_coverage_25/centered_abs_mean": 0.11583262433608373,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8883101851851851,
"signal/frontier_coverage_25/group_std_mean": 0.14927472174167633,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014479078430061538,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014479078430061538,
"signal/frontier_coverage_5/centered_abs_mean": 0.18458310763041177,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8287037037037037,
"signal/frontier_coverage_5/group_std_mean": 0.24101491769154867,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002307288891946276,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002307288891946276,
"signal/frontier_ece_reward/centered_abs_mean": 0.01901736669242382,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8396990740740741,
"signal/frontier_ece_reward/group_std_mean": 0.024036493773261707,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019017367934187253,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019017367934187253,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3173823555310567,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7534722222222222,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3859201769034068,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031738235925634704,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031738235925634704,
"step": 208,
"total_flos": 0.0,
"train_loss": -0.01055129385685387,
"train_runtime": 40426.5299,
"train_samples_per_second": 0.371,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 208,
"num_input_tokens_seen": 470854939,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}