Files
RLCR-v4-ks-uniqueness-hotpo…/trainer_state.json
ModelHub XC 18ec7018f8 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-hotpot-aliases
Source: Original Platform
2026-05-10 05:46:45 +08:00

8566 lines
520 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 50,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.6365829417675869,
"calibration/batch_distribution_entropy": 0.6585998493860218,
"calibration/confidence_entropy": 0.34533354478171396,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.49700134759958975,
"calibration/mean_confidence": 0.7904389820626345,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1493.8,
"completions/mean_length": 272.11005859375,
"completions/mean_terminated_length": 222.85674438476562,
"completions/min_length": 2.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.016,
"grad_norm": 0.09750684350728989,
"learning_rate": 3.1249999999999997e-07,
"loss": 0.0951,
"num_tokens": 17630439.0,
"reward": 0.6584686756134033,
"reward_std": 0.4942072808742523,
"rewards/accuracy_reward": 0.26572265625,
"rewards/brier_reward": 0.40714969038963317,
"rewards/confidence_uniqueness_reward": 0.4840377986431122,
"rewards/format_reward": 0.6783203125,
"rewards/frontier_aurc_reward": 0.2998352885246277,
"rewards/frontier_coverage_1": 0.2998352885246277,
"rewards/frontier_coverage_10": 0.2998352885246277,
"rewards/frontier_coverage_15": 0.2998352885246277,
"rewards/frontier_coverage_20": 0.2998352885246277,
"rewards/frontier_coverage_25": 0.2998352885246277,
"rewards/frontier_coverage_5": 0.2998352885246277,
"rewards/frontier_ece_reward": 0.2998352885246277,
"signal/accuracy_reward/centered_abs_mean": 0.275433349609375,
"signal/accuracy_reward/group_std_mean": 0.31715606451034545,
"signal/accuracy_reward/group_zero_std_frac": 0.25625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1377166748046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1377166748046875,
"signal/advantage_abs_mean": 0.42427608370780945,
"signal/advantage_pre_scale_abs_mean": 0.42427608370780945,
"signal/advantage_pre_scale_std": 0.5014617919921875,
"signal/advantage_std": 0.5014617919921875,
"signal/brier_reward/centered_abs_mean": 0.3354613959789276,
"signal/brier_reward/group_std_mean": 0.3796173930168152,
"signal/brier_reward/group_zero_std_frac": 0.003125,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04193267449736595,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.04193267449736595,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2990226149559021,
"signal/confidence_uniqueness_reward/group_std_mean": 0.3495690166950226,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03737782686948776,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03737782686948776,
"signal/format_reward/centered_abs_mean": 0.4059326171875,
"signal/format_reward/group_std_mean": 0.45510302782058715,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.20296630859375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.20296630859375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.292724135518074,
"signal/frontier_aurc_reward/group_std_mean": 0.3436519503593445,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005239761807024479,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005239761807024479,
"signal/frontier_coverage_1/centered_abs_mean": 0.292724135518074,
"signal/frontier_coverage_1/group_std_mean": 0.3436519503593445,
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005239761807024479,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005239761807024479,
"signal/frontier_coverage_10/centered_abs_mean": 0.292724135518074,
"signal/frontier_coverage_10/group_std_mean": 0.3436519503593445,
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005239761807024479,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005239761807024479,
"signal/frontier_coverage_15/centered_abs_mean": 0.292724135518074,
"signal/frontier_coverage_15/group_std_mean": 0.3436519503593445,
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005239761807024479,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005239761807024479,
"signal/frontier_coverage_20/centered_abs_mean": 0.292724135518074,
"signal/frontier_coverage_20/group_std_mean": 0.3436519503593445,
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005239761807024479,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005239761807024479,
"signal/frontier_coverage_25/centered_abs_mean": 0.292724135518074,
"signal/frontier_coverage_25/group_std_mean": 0.3436519503593445,
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005239761807024479,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005239761807024479,
"signal/frontier_coverage_5/centered_abs_mean": 0.292724135518074,
"signal/frontier_coverage_5/group_std_mean": 0.3436519503593445,
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005239761807024479,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005239761807024479,
"signal/frontier_ece_reward/centered_abs_mean": 0.292724135518074,
"signal/frontier_ece_reward/group_std_mean": 0.3436519503593445,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03659051693975925,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03659051693975925,
"step": 5
},
{
"calibration/aurc": 0.6490185669552773,
"calibration/batch_distribution_entropy": 0.6521431833905986,
"calibration/confidence_entropy": 0.34190821981819597,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5192164657110803,
"calibration/mean_confidence": 0.793153970465858,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0359375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1499.6,
"completions/mean_length": 262.6380859375,
"completions/mean_terminated_length": 215.18854370117188,
"completions/min_length": 1.8,
"completions/min_terminated_length": 1.8,
"epoch": 0.032,
"grad_norm": 0.043926581740379333,
"learning_rate": 6.249999999999999e-07,
"loss": 0.0953,
"num_tokens": 35420205.0,
"reward": 0.6759413719177246,
"reward_std": 0.47106270790100097,
"rewards/accuracy_reward": 0.25302734375,
"rewards/brier_reward": 0.4121467649936676,
"rewards/confidence_uniqueness_reward": 0.5148928165435791,
"rewards/format_reward": 0.7166015625,
"rewards/frontier_aurc_reward": 0.3006272315979004,
"rewards/frontier_coverage_1": 0.3006272315979004,
"rewards/frontier_coverage_10": 0.3006272315979004,
"rewards/frontier_coverage_15": 0.3006272315979004,
"rewards/frontier_coverage_20": 0.3006272315979004,
"rewards/frontier_coverage_25": 0.3006272315979004,
"rewards/frontier_coverage_5": 0.3006272315979004,
"rewards/frontier_ece_reward": 0.3006272315979004,
"signal/accuracy_reward/centered_abs_mean": 0.262005615234375,
"signal/accuracy_reward/group_std_mean": 0.30882692337036133,
"signal/accuracy_reward/group_zero_std_frac": 0.253125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1310028076171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1310028076171875,
"signal/advantage_abs_mean": 0.3928821861743927,
"signal/advantage_pre_scale_abs_mean": 0.3928821861743927,
"signal/advantage_pre_scale_std": 0.4780964195728302,
"signal/advantage_std": 0.4780964195728302,
"signal/brier_reward/centered_abs_mean": 0.32577033042907716,
"signal/brier_reward/group_std_mean": 0.3727036893367767,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.040721291303634645,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.040721291303634645,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.28297078013420107,
"signal/confidence_uniqueness_reward/group_std_mean": 0.34029907584190366,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.035371347516775134,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.035371347516775134,
"signal/format_reward/centered_abs_mean": 0.3770751953125,
"signal/format_reward/group_std_mean": 0.43777642846107484,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.18853759765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.18853759765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.2814865827560425,
"signal/frontier_aurc_reward/group_std_mean": 0.33588545918464663,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005038609728217125,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005038609728217125,
"signal/frontier_coverage_1/centered_abs_mean": 0.2814865827560425,
"signal/frontier_coverage_1/group_std_mean": 0.33588545918464663,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005038609728217125,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005038609728217125,
"signal/frontier_coverage_10/centered_abs_mean": 0.2814865827560425,
"signal/frontier_coverage_10/group_std_mean": 0.33588545918464663,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005038609728217125,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005038609728217125,
"signal/frontier_coverage_15/centered_abs_mean": 0.2814865827560425,
"signal/frontier_coverage_15/group_std_mean": 0.33588545918464663,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005038609728217125,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005038609728217125,
"signal/frontier_coverage_20/centered_abs_mean": 0.2814865827560425,
"signal/frontier_coverage_20/group_std_mean": 0.33588545918464663,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005038609728217125,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005038609728217125,
"signal/frontier_coverage_25/centered_abs_mean": 0.2814865827560425,
"signal/frontier_coverage_25/group_std_mean": 0.33588545918464663,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005038609728217125,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005038609728217125,
"signal/frontier_coverage_5/centered_abs_mean": 0.2814865827560425,
"signal/frontier_coverage_5/group_std_mean": 0.33588545918464663,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005038609728217125,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005038609728217125,
"signal/frontier_ece_reward/centered_abs_mean": 0.2814865827560425,
"signal/frontier_ece_reward/group_std_mean": 0.33588545918464663,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03518582284450531,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03518582284450531,
"step": 10
},
{
"calibration/aurc": 0.6122665169899899,
"calibration/batch_distribution_entropy": 0.6434935543708379,
"calibration/buffer_distribution_entropy": 0.665517862384225,
"calibration/confidence_entropy": 0.3373251428514815,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.48297516178973776,
"calibration/mean_confidence": 0.802906526845207,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0193359375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1476.6,
"completions/mean_length": 208.11865234375,
"completions/mean_terminated_length": 182.09054870605468,
"completions/min_length": 6.0,
"completions/min_terminated_length": 6.0,
"epoch": 0.048,
"grad_norm": 0.0721765011548996,
"learning_rate": 9.374999999999999e-07,
"loss": 0.0643,
"num_tokens": 52600076.0,
"reward": 0.8152373671531677,
"reward_std": 0.36448066532611845,
"rewards/accuracy_reward": 0.3287109375,
"rewards/brier_reward": 0.521061384677887,
"rewards/confidence_uniqueness_reward": 0.627258050441742,
"rewards/format_reward": 0.8685546875,
"rewards/frontier_aurc_reward": 0.29203636273741723,
"rewards/frontier_coverage_1": 0.30767875611782075,
"rewards/frontier_coverage_10": 0.30767875611782075,
"rewards/frontier_coverage_15": 0.30767875611782075,
"rewards/frontier_coverage_20": 0.30767875611782075,
"rewards/frontier_coverage_25": 0.30767875611782075,
"rewards/frontier_coverage_5": 0.30767875611782075,
"rewards/frontier_ece_reward": 0.27833986282348633,
"signal/accuracy_reward/centered_abs_mean": 0.2286376953125,
"signal/accuracy_reward/group_std_mean": 0.2820782124996185,
"signal/accuracy_reward/group_zero_std_frac": 0.275,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11431884765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11431884765625,
"signal/advantage_abs_mean": 0.27974323034286497,
"signal/advantage_pre_scale_abs_mean": 0.27974323034286497,
"signal/advantage_pre_scale_std": 0.3752832055091858,
"signal/advantage_std": 0.3752832055091858,
"signal/brier_reward/centered_abs_mean": 0.28167834877967834,
"signal/brier_reward/group_std_mean": 0.3381074070930481,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03520979359745979,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03520979359745979,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.20614430904388428,
"signal/confidence_uniqueness_reward/group_std_mean": 0.2669346034526825,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025768038630485535,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.025768038630485535,
"signal/format_reward/centered_abs_mean": 0.20537109375,
"signal/format_reward/group_std_mean": 0.29972409307956693,
"signal/format_reward/group_zero_std_frac": 0.078125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.102685546875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.102685546875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.22096354123204948,
"signal/frontier_aurc_reward/group_std_mean": 0.2634817738085985,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0039552472357172515,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0039552472357172515,
"signal/frontier_coverage_1/centered_abs_mean": 0.24031212329864501,
"signal/frontier_coverage_1/group_std_mean": 0.2931499183177948,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004301586840301752,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004301586840301752,
"signal/frontier_coverage_10/centered_abs_mean": 0.24031212329864501,
"signal/frontier_coverage_10/group_std_mean": 0.2931499183177948,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004301586840301752,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004301586840301752,
"signal/frontier_coverage_15/centered_abs_mean": 0.24031212329864501,
"signal/frontier_coverage_15/group_std_mean": 0.2931499183177948,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004301586840301752,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004301586840301752,
"signal/frontier_coverage_20/centered_abs_mean": 0.24031212329864501,
"signal/frontier_coverage_20/group_std_mean": 0.2931499183177948,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004301586840301752,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004301586840301752,
"signal/frontier_coverage_25/centered_abs_mean": 0.24031212329864501,
"signal/frontier_coverage_25/group_std_mean": 0.2931499183177948,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004301586840301752,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004301586840301752,
"signal/frontier_coverage_5/centered_abs_mean": 0.24031212329864501,
"signal/frontier_coverage_5/group_std_mean": 0.2931499183177948,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004301586840301752,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004301586840301752,
"signal/frontier_ece_reward/centered_abs_mean": 0.24247534871101378,
"signal/frontier_ece_reward/group_std_mean": 0.2915202736854553,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030309418588876723,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030309418588876723,
"step": 15
},
{
"calibration/aurc": 0.5364586742043255,
"calibration/batch_distribution_entropy": 0.7039374947589497,
"calibration/buffer_distribution_entropy": 0.6644280848988587,
"calibration/confidence_entropy": 0.3737305065676649,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.37717855912862763,
"calibration/mean_confidence": 0.7704402478934718,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00419921875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1247.8,
"completions/mean_length": 141.49462890625,
"completions/mean_terminated_length": 135.61878967285156,
"completions/min_length": 14.4,
"completions/min_terminated_length": 14.4,
"epoch": 0.064,
"grad_norm": 0.02302715927362442,
"learning_rate": 1e-06,
"loss": 0.0148,
"num_tokens": 68967381.0,
"reward": 0.8545892715454102,
"reward_std": 0.22467853426933287,
"rewards/accuracy_reward": 0.3982421875,
"rewards/brier_reward": 0.6125121355056763,
"rewards/confidence_uniqueness_reward": 0.7326454758644104,
"rewards/format_reward": 0.97578125,
"rewards/frontier_aurc_reward": -0.007029248867183924,
"rewards/frontier_coverage_1": 0.06456700265407563,
"rewards/frontier_coverage_10": 0.06456700265407563,
"rewards/frontier_coverage_15": 0.06456700265407563,
"rewards/frontier_coverage_20": 0.06456700265407563,
"rewards/frontier_coverage_25": 0.06456700265407563,
"rewards/frontier_coverage_5": 0.06456700265407563,
"rewards/frontier_ece_reward": -0.05900650816038251,
"signal/accuracy_reward/centered_abs_mean": 0.22564697265625,
"signal/accuracy_reward/group_std_mean": 0.27533276081085206,
"signal/accuracy_reward/group_zero_std_frac": 0.30625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.112823486328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.112823486328125,
"signal/advantage_abs_mean": 0.17218832969665526,
"signal/advantage_pre_scale_abs_mean": 0.17218832969665526,
"signal/advantage_pre_scale_std": 0.24152583181858062,
"signal/advantage_std": 0.24152583181858062,
"signal/brier_reward/centered_abs_mean": 0.23915229737758636,
"signal/brier_reward/group_std_mean": 0.2956496119499207,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029894037172198296,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.029894037172198296,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.12884613871574402,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1678939491510391,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.016105767339468002,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.016105767339468002,
"signal/format_reward/centered_abs_mean": 0.044775390625,
"signal/format_reward/group_std_mean": 0.10225975811481476,
"signal/format_reward/group_zero_std_frac": 0.51875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0223876953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0223876953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.005020592454820871,
"signal/frontier_aurc_reward/group_std_mean": 0.00707492595538497,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.986860339064152e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.986860339064152e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.10616557449102401,
"signal/frontier_coverage_1/group_std_mean": 0.1660414755344391,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001900363783352077,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001900363783352077,
"signal/frontier_coverage_10/centered_abs_mean": 0.10616557449102401,
"signal/frontier_coverage_10/group_std_mean": 0.1660414755344391,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001900363783352077,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001900363783352077,
"signal/frontier_coverage_15/centered_abs_mean": 0.10616557449102401,
"signal/frontier_coverage_15/group_std_mean": 0.1660414755344391,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001900363783352077,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001900363783352077,
"signal/frontier_coverage_20/centered_abs_mean": 0.10616557449102401,
"signal/frontier_coverage_20/group_std_mean": 0.1660414755344391,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001900363783352077,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001900363783352077,
"signal/frontier_coverage_25/centered_abs_mean": 0.10616557449102401,
"signal/frontier_coverage_25/group_std_mean": 0.1660414755344391,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001900363783352077,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001900363783352077,
"signal/frontier_coverage_5/centered_abs_mean": 0.10616557449102401,
"signal/frontier_coverage_5/group_std_mean": 0.1660414755344391,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001900363783352077,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001900363783352077,
"signal/frontier_ece_reward/centered_abs_mean": 0.13162615597248079,
"signal/frontier_ece_reward/group_std_mean": 0.16141263544559478,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.016453269496560098,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.016453269496560098,
"step": 20
},
{
"calibration/aurc": 0.6185683801548196,
"calibration/batch_distribution_entropy": 0.7741915745085599,
"calibration/buffer_distribution_entropy": 0.6910919502278804,
"calibration/confidence_entropy": 0.44644902052135416,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4119077840049532,
"calibration/mean_confidence": 0.728700664962728,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00107421875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 804.6,
"completions/mean_length": 121.98232421875,
"completions/mean_terminated_length": 120.46162719726563,
"completions/min_length": 29.6,
"completions/min_terminated_length": 29.6,
"epoch": 0.08,
"grad_norm": 0.03972521796822548,
"learning_rate": 1e-06,
"loss": 0.0022,
"num_tokens": 85149632.0,
"reward": 0.8964222550392151,
"reward_std": 0.1864424616098404,
"rewards/accuracy_reward": 0.43291015625,
"rewards/brier_reward": 0.6608709096908569,
"rewards/confidence_uniqueness_reward": 0.7922413229942322,
"rewards/format_reward": 0.99423828125,
"rewards/frontier_aurc_reward": -0.0059954837895929815,
"rewards/frontier_coverage_1": 0.06537417620420456,
"rewards/frontier_coverage_10": 0.06537417620420456,
"rewards/frontier_coverage_15": 0.06537417620420456,
"rewards/frontier_coverage_20": 0.06537417620420456,
"rewards/frontier_coverage_25": 0.06537417620420456,
"rewards/frontier_coverage_5": 0.06537417620420456,
"rewards/frontier_ece_reward": -0.04563892595469952,
"signal/accuracy_reward/centered_abs_mean": 0.212396240234375,
"signal/accuracy_reward/group_std_mean": 0.2677969515323639,
"signal/accuracy_reward/group_zero_std_frac": 0.28125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1061981201171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1061981201171875,
"signal/advantage_abs_mean": 0.1454618066549301,
"signal/advantage_pre_scale_abs_mean": 0.1454618066549301,
"signal/advantage_pre_scale_std": 0.20510812401771544,
"signal/advantage_std": 0.20510812401771544,
"signal/brier_reward/centered_abs_mean": 0.2104009658098221,
"signal/brier_reward/group_std_mean": 0.2618310570716858,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02630012072622776,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02630012072622776,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08538121879100799,
"signal/confidence_uniqueness_reward/group_std_mean": 0.11254773437976837,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010672652348875999,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010672652348875999,
"signal/format_reward/centered_abs_mean": 0.011029052734375,
"signal/format_reward/group_std_mean": 0.029448001086711882,
"signal/format_reward/group_zero_std_frac": 0.84375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0055145263671875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0055145263671875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036648384761065246,
"signal/frontier_aurc_reward/group_std_mean": 0.005378965474665165,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.560060792253353e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.560060792253353e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12216156721115112,
"signal/frontier_coverage_1/group_std_mean": 0.1854743927717209,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021866919472813607,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021866919472813607,
"signal/frontier_coverage_10/centered_abs_mean": 0.12216156721115112,
"signal/frontier_coverage_10/group_std_mean": 0.1854743927717209,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021866919472813607,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021866919472813607,
"signal/frontier_coverage_15/centered_abs_mean": 0.12216156721115112,
"signal/frontier_coverage_15/group_std_mean": 0.1854743927717209,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021866919472813607,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021866919472813607,
"signal/frontier_coverage_20/centered_abs_mean": 0.12216156721115112,
"signal/frontier_coverage_20/group_std_mean": 0.1854743927717209,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021866919472813607,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021866919472813607,
"signal/frontier_coverage_25/centered_abs_mean": 0.12216156721115112,
"signal/frontier_coverage_25/group_std_mean": 0.1854743927717209,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021866919472813607,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021866919472813607,
"signal/frontier_coverage_5/centered_abs_mean": 0.12216156721115112,
"signal/frontier_coverage_5/group_std_mean": 0.1854743927717209,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021866919472813607,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021866919472813607,
"signal/frontier_ece_reward/centered_abs_mean": 0.11495534181594849,
"signal/frontier_ece_reward/group_std_mean": 0.14088748395442963,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.014369417726993561,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.014369417726993561,
"step": 25
},
{
"calibration/aurc": 0.6111911956970324,
"calibration/batch_distribution_entropy": 0.8338461630160652,
"calibration/buffer_distribution_entropy": 0.724930626699906,
"calibration/confidence_entropy": 0.5217741949310019,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.3064762667859151,
"calibration/mean_confidence": 0.6442235468220104,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 1422.8,
"completions/max_terminated_length": 868.2,
"completions/mean_length": 122.53857421875,
"completions/mean_terminated_length": 121.71004791259766,
"completions/min_length": 42.0,
"completions/min_terminated_length": 42.0,
"epoch": 0.096,
"grad_norm": 0.004610727075487375,
"learning_rate": 1e-06,
"loss": 0.0014,
"num_tokens": 101449035.0,
"reward": 0.9245005130767823,
"reward_std": 0.16271952390670777,
"rewards/accuracy_reward": 0.46552734375,
"rewards/brier_reward": 0.7045912981033325,
"rewards/confidence_uniqueness_reward": 0.8080425143241883,
"rewards/format_reward": 0.99658203125,
"rewards/frontier_aurc_reward": -0.005250969249755144,
"rewards/frontier_coverage_1": 0.07289079874753952,
"rewards/frontier_coverage_10": 0.07289079874753952,
"rewards/frontier_coverage_15": 0.07289079874753952,
"rewards/frontier_coverage_20": 0.07289079874753952,
"rewards/frontier_coverage_25": 0.07289079874753952,
"rewards/frontier_coverage_5": 0.07289079874753952,
"rewards/frontier_ece_reward": -0.026943267788738012,
"signal/accuracy_reward/centered_abs_mean": 0.185174560546875,
"signal/accuracy_reward/group_std_mean": 0.23991808891296387,
"signal/accuracy_reward/group_zero_std_frac": 0.334375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0925872802734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0925872802734375,
"signal/advantage_abs_mean": 0.12463378012180329,
"signal/advantage_pre_scale_abs_mean": 0.12463378012180329,
"signal/advantage_pre_scale_std": 0.1815657287836075,
"signal/advantage_std": 0.1815657287836075,
"signal/brier_reward/centered_abs_mean": 0.18598549365997313,
"signal/brier_reward/group_std_mean": 0.2356630265712738,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02324818670749664,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02324818670749664,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08923951834440232,
"signal/confidence_uniqueness_reward/group_std_mean": 0.11270735561847686,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01115493979305029,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01115493979305029,
"signal/format_reward/centered_abs_mean": 0.006573486328125,
"signal/format_reward/group_std_mean": 0.017989716865122317,
"signal/format_reward/group_zero_std_frac": 0.903125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0032867431640625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0032867431640625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00277226809412241,
"signal/frontier_aurc_reward/group_std_mean": 0.004334397334605456,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.962359598721377e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.962359598721377e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14637718200683594,
"signal/frontier_coverage_1/group_std_mean": 0.20774976015090943,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026201514527201654,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026201514527201654,
"signal/frontier_coverage_10/centered_abs_mean": 0.14637718200683594,
"signal/frontier_coverage_10/group_std_mean": 0.20774976015090943,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026201514527201654,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026201514527201654,
"signal/frontier_coverage_15/centered_abs_mean": 0.14637718200683594,
"signal/frontier_coverage_15/group_std_mean": 0.20774976015090943,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026201514527201654,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026201514527201654,
"signal/frontier_coverage_20/centered_abs_mean": 0.14637718200683594,
"signal/frontier_coverage_20/group_std_mean": 0.20774976015090943,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026201514527201654,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026201514527201654,
"signal/frontier_coverage_25/centered_abs_mean": 0.14637718200683594,
"signal/frontier_coverage_25/group_std_mean": 0.20774976015090943,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026201514527201654,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026201514527201654,
"signal/frontier_coverage_5/centered_abs_mean": 0.14637718200683594,
"signal/frontier_coverage_5/group_std_mean": 0.20774976015090943,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026201514527201654,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026201514527201654,
"signal/frontier_ece_reward/centered_abs_mean": 0.09951903373003006,
"signal/frontier_ece_reward/group_std_mean": 0.12258590757846832,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012439879216253757,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012439879216253757,
"step": 30
},
{
"calibration/aurc": 0.47581321016978,
"calibration/batch_distribution_entropy": 0.8613389118246927,
"calibration/buffer_distribution_entropy": 0.7676086231549225,
"calibration/confidence_entropy": 0.5509334290904493,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.00821917808219178,
"calibration/coverage@25%": 0.01643835616438356,
"calibration/coverage@30%": 0.028180039138943246,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.17883227267157914,
"calibration/mean_confidence": 0.5884126079190704,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 690.6,
"completions/max_terminated_length": 476.2,
"completions/mean_length": 130.32890625,
"completions/mean_terminated_length": 130.19188842773437,
"completions/min_length": 43.2,
"completions/min_terminated_length": 43.2,
"epoch": 0.112,
"grad_norm": 0.008051756769418716,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 117893075.0,
"reward": 0.9516644597053527,
"reward_std": 0.1442788153886795,
"rewards/accuracy_reward": 0.49208984375,
"rewards/brier_reward": 0.7432172417640686,
"rewards/confidence_uniqueness_reward": 0.8300640344619751,
"rewards/format_reward": 0.9986328125,
"rewards/frontier_aurc_reward": -0.004443310108035803,
"rewards/frontier_coverage_1": 0.09440049231052398,
"rewards/frontier_coverage_10": 0.09440049231052398,
"rewards/frontier_coverage_15": 0.09440049231052398,
"rewards/frontier_coverage_20": 0.09440049231052398,
"rewards/frontier_coverage_25": 0.09440049231052398,
"rewards/frontier_coverage_5": 0.09440049231052398,
"rewards/frontier_ece_reward": -0.0033288702834397554,
"signal/accuracy_reward/centered_abs_mean": 0.176458740234375,
"signal/accuracy_reward/group_std_mean": 0.23186054825782776,
"signal/accuracy_reward/group_zero_std_frac": 0.35,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0882293701171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0882293701171875,
"signal/advantage_abs_mean": 0.11125468015670777,
"signal/advantage_pre_scale_abs_mean": 0.11125468015670777,
"signal/advantage_pre_scale_std": 0.16140751540660858,
"signal/advantage_std": 0.16140751540660858,
"signal/brier_reward/centered_abs_mean": 0.16677133738994598,
"signal/brier_reward/group_std_mean": 0.21192941665649415,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020846417173743248,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020846417173743248,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09038377106189728,
"signal/confidence_uniqueness_reward/group_std_mean": 0.11349603980779648,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01129797138273716,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01129797138273716,
"signal/format_reward/centered_abs_mean": 0.00263671875,
"signal/format_reward/group_std_mean": 0.007397671649232507,
"signal/format_reward/group_zero_std_frac": 0.959375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001318359375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001318359375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019801823887974023,
"signal/frontier_aurc_reward/group_std_mean": 0.0030699548777192833,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5445262619759886e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5445262619759886e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1819765478372574,
"signal/frontier_coverage_1/group_std_mean": 0.24308145940303802,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032573801465332506,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032573801465332506,
"signal/frontier_coverage_10/centered_abs_mean": 0.1819765478372574,
"signal/frontier_coverage_10/group_std_mean": 0.24308145940303802,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032573801465332506,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032573801465332506,
"signal/frontier_coverage_15/centered_abs_mean": 0.1819765478372574,
"signal/frontier_coverage_15/group_std_mean": 0.24308145940303802,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032573801465332506,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032573801465332506,
"signal/frontier_coverage_20/centered_abs_mean": 0.1819765478372574,
"signal/frontier_coverage_20/group_std_mean": 0.24308145940303802,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032573801465332506,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032573801465332506,
"signal/frontier_coverage_25/centered_abs_mean": 0.1819765478372574,
"signal/frontier_coverage_25/group_std_mean": 0.24308145940303802,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032573801465332506,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032573801465332506,
"signal/frontier_coverage_5/centered_abs_mean": 0.1819765478372574,
"signal/frontier_coverage_5/group_std_mean": 0.24308145940303802,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032573801465332506,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032573801465332506,
"signal/frontier_ece_reward/centered_abs_mean": 0.08115999251604081,
"signal/frontier_ece_reward/group_std_mean": 0.10086136162281037,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.010144999064505101,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.010144999064505101,
"step": 35
},
{
"calibration/aurc": 0.500457568524228,
"calibration/batch_distribution_entropy": 0.8839443566974549,
"calibration/buffer_distribution_entropy": 0.8121122106093612,
"calibration/confidence_entropy": 0.5729897651788818,
"calibration/coverage@0%": 0.0023468137254901962,
"calibration/coverage@1%": 0.0023468137254901962,
"calibration/coverage@10%": 0.0023468137254901962,
"calibration/coverage@15%": 0.0023468137254901962,
"calibration/coverage@20%": 0.015246620908637426,
"calibration/coverage@25%": 0.016027870908637425,
"calibration/coverage@30%": 0.04967818891255132,
"calibration/coverage@5%": 0.0023468137254901962,
"calibration/ece": 0.1492628622651326,
"calibration/mean_confidence": 0.4656056361196665,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 742.6,
"completions/max_terminated_length": 503.8,
"completions/mean_length": 142.25283203125,
"completions/mean_terminated_length": 142.11691589355468,
"completions/min_length": 53.4,
"completions/min_terminated_length": 53.4,
"epoch": 0.128,
"grad_norm": 0.004143570549786091,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 134266416.0,
"reward": 0.9478516936302185,
"reward_std": 0.1235954999923706,
"rewards/accuracy_reward": 0.470703125,
"rewards/brier_reward": 0.7474552869796753,
"rewards/confidence_uniqueness_reward": 0.8561806559562684,
"rewards/format_reward": 0.99873046875,
"rewards/frontier_aurc_reward": -0.0040137280710041525,
"rewards/frontier_coverage_1": 0.11203746348619462,
"rewards/frontier_coverage_10": 0.11203746348619462,
"rewards/frontier_coverage_15": 0.11203746348619462,
"rewards/frontier_coverage_20": 0.11203746348619462,
"rewards/frontier_coverage_25": 0.11203746348619462,
"rewards/frontier_coverage_5": 0.11203746348619462,
"rewards/frontier_ece_reward": 0.005755350179970265,
"signal/accuracy_reward/centered_abs_mean": 0.156494140625,
"signal/accuracy_reward/group_std_mean": 0.20581283569335937,
"signal/accuracy_reward/group_zero_std_frac": 0.415625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0782470703125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0782470703125,
"signal/advantage_abs_mean": 0.09602416306734085,
"signal/advantage_pre_scale_abs_mean": 0.09602416306734085,
"signal/advantage_pre_scale_std": 0.1393231213092804,
"signal/advantage_std": 0.1393231213092804,
"signal/brier_reward/centered_abs_mean": 0.16182340383529664,
"signal/brier_reward/group_std_mean": 0.20294124484062195,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02022792547941208,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02022792547941208,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07690812945365906,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0960095539689064,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009613516181707383,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009613516181707383,
"signal/format_reward/centered_abs_mean": 0.002447509765625,
"signal/format_reward/group_std_mean": 0.00684524467214942,
"signal/format_reward/group_zero_std_frac": 0.9625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012237548828125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0012237548828125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013813054421916604,
"signal/frontier_aurc_reward/group_std_mean": 0.002145401481539011,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4725366165512243e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4725366165512243e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.21806600689888,
"signal/frontier_coverage_1/group_std_mean": 0.2785297632217407,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0039033814333379268,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0039033814333379268,
"signal/frontier_coverage_10/centered_abs_mean": 0.21806600689888,
"signal/frontier_coverage_10/group_std_mean": 0.2785297632217407,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0039033814333379268,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0039033814333379268,
"signal/frontier_coverage_15/centered_abs_mean": 0.21806600689888,
"signal/frontier_coverage_15/group_std_mean": 0.2785297632217407,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0039033814333379268,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0039033814333379268,
"signal/frontier_coverage_20/centered_abs_mean": 0.21806600689888,
"signal/frontier_coverage_20/group_std_mean": 0.2785297632217407,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0039033814333379268,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0039033814333379268,
"signal/frontier_coverage_25/centered_abs_mean": 0.21806600689888,
"signal/frontier_coverage_25/group_std_mean": 0.2785297632217407,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0039033814333379268,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0039033814333379268,
"signal/frontier_coverage_5/centered_abs_mean": 0.21806600689888,
"signal/frontier_coverage_5/group_std_mean": 0.2785297632217407,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0039033814333379268,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0039033814333379268,
"signal/frontier_ece_reward/centered_abs_mean": 0.05941944047808647,
"signal/frontier_ece_reward/group_std_mean": 0.07643859535455703,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007427430059760809,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007427430059760809,
"step": 40
},
{
"calibration/aurc": 0.3110369117140962,
"calibration/batch_distribution_entropy": 0.9074994904593658,
"calibration/buffer_distribution_entropy": 0.8566246693752007,
"calibration/confidence_entropy": 0.5313453136877156,
"calibration/coverage@0%": 0.00078125,
"calibration/coverage@1%": 0.00078125,
"calibration/coverage@10%": 0.03671875,
"calibration/coverage@15%": 0.11328125,
"calibration/coverage@20%": 0.224609375,
"calibration/coverage@25%": 0.3406441108121331,
"calibration/coverage@30%": 0.4304878608121331,
"calibration/coverage@5%": 0.00078125,
"calibration/ece": 0.23118559873124087,
"calibration/mean_confidence": 0.39543612093607816,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1101.4,
"completions/max_terminated_length": 424.2,
"completions/mean_length": 146.07265625,
"completions/mean_terminated_length": 145.52989501953124,
"completions/min_length": 47.4,
"completions/min_terminated_length": 47.4,
"epoch": 0.144,
"grad_norm": 0.0035824107471853495,
"learning_rate": 1e-06,
"loss": 0.0015,
"num_tokens": 150712632.0,
"reward": 0.9979648113250732,
"reward_std": 0.11293403208255767,
"rewards/accuracy_reward": 0.5818359375,
"rewards/brier_reward": 0.7256381988525391,
"rewards/confidence_uniqueness_reward": 0.8713930606842041,
"rewards/format_reward": 0.99853515625,
"rewards/frontier_aurc_reward": -0.0034363477025181055,
"rewards/frontier_coverage_1": 0.057438090443611145,
"rewards/frontier_coverage_10": 0.057438090443611145,
"rewards/frontier_coverage_15": 0.057438090443611145,
"rewards/frontier_coverage_20": 0.057438090443611145,
"rewards/frontier_coverage_25": 0.057438090443611145,
"rewards/frontier_coverage_5": 0.057438090443611145,
"rewards/frontier_ece_reward": 0.016344105079770088,
"signal/accuracy_reward/centered_abs_mean": 0.156396484375,
"signal/accuracy_reward/group_std_mean": 0.2017151564359665,
"signal/accuracy_reward/group_zero_std_frac": 0.434375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0781982421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0781982421875,
"signal/advantage_abs_mean": 0.0892390176653862,
"signal/advantage_pre_scale_abs_mean": 0.0892390176653862,
"signal/advantage_pre_scale_std": 0.12830377966165543,
"signal/advantage_std": 0.12830377966165543,
"signal/brier_reward/centered_abs_mean": 0.17482829093933105,
"signal/brier_reward/group_std_mean": 0.21727988123893738,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021853536367416382,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021853536367416382,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06832201182842254,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08698472678661347,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008540251478552818,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008540251478552818,
"signal/format_reward/centered_abs_mean": 0.002496337890625,
"signal/format_reward/group_std_mean": 0.005241806851699948,
"signal/format_reward/group_zero_std_frac": 0.975,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012481689453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0012481689453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013276722747832537,
"signal/frontier_aurc_reward/group_std_mean": 0.002048111497424543,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3765333025949076e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3765333025949076e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.25025501251220705,
"signal/frontier_coverage_1/group_std_mean": 0.3121617794036865,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0044795645400881766,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0044795645400881766,
"signal/frontier_coverage_10/centered_abs_mean": 0.25025501251220705,
"signal/frontier_coverage_10/group_std_mean": 0.3121617794036865,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0044795645400881766,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0044795645400881766,
"signal/frontier_coverage_15/centered_abs_mean": 0.25025501251220705,
"signal/frontier_coverage_15/group_std_mean": 0.3121617794036865,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0044795645400881766,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0044795645400881766,
"signal/frontier_coverage_20/centered_abs_mean": 0.25025501251220705,
"signal/frontier_coverage_20/group_std_mean": 0.3121617794036865,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0044795645400881766,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0044795645400881766,
"signal/frontier_coverage_25/centered_abs_mean": 0.25025501251220705,
"signal/frontier_coverage_25/group_std_mean": 0.3121617794036865,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0044795645400881766,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0044795645400881766,
"signal/frontier_coverage_5/centered_abs_mean": 0.25025501251220705,
"signal/frontier_coverage_5/group_std_mean": 0.3121617794036865,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0044795645400881766,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0044795645400881766,
"signal/frontier_ece_reward/centered_abs_mean": 0.047294650226831436,
"signal/frontier_ece_reward/group_std_mean": 0.06447840631008148,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0059118312783539295,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0059118312783539295,
"step": 45
},
{
"calibration/aurc": 0.38340311213282363,
"calibration/batch_distribution_entropy": 0.9228495953097084,
"calibration/buffer_distribution_entropy": 0.8920677169770166,
"calibration/confidence_entropy": 0.5123542013682538,
"calibration/coverage@0%": 0.00546875,
"calibration/coverage@1%": 0.00546875,
"calibration/coverage@10%": 0.017993211839530333,
"calibration/coverage@15%": 0.05277336105675147,
"calibration/coverage@20%": 0.08248608732876712,
"calibration/coverage@25%": 0.15869465508806263,
"calibration/coverage@30%": 0.28567453522504893,
"calibration/coverage@5%": 0.00546875,
"calibration/ece": 0.12188082602439854,
"calibration/mean_confidence": 0.3888530788440171,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 870.8,
"completions/max_terminated_length": 385.8,
"completions/mean_length": 149.441015625,
"completions/mean_terminated_length": 149.17046203613282,
"completions/min_length": 55.8,
"completions/min_terminated_length": 55.8,
"epoch": 0.16,
"grad_norm": 0.00288645108230412,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 167263836.0,
"reward": 0.9865847229957581,
"reward_std": 0.11459582149982453,
"rewards/accuracy_reward": 0.537890625,
"rewards/brier_reward": 0.7459044456481934,
"rewards/confidence_uniqueness_reward": 0.8809366464614868,
"rewards/format_reward": 0.9990234375,
"rewards/frontier_aurc_reward": -0.0034455065149813892,
"rewards/frontier_coverage_1": 0.11904234737157822,
"rewards/frontier_coverage_10": 0.11904234737157822,
"rewards/frontier_coverage_15": 0.11904234737157822,
"rewards/frontier_coverage_20": 0.11904234737157822,
"rewards/frontier_coverage_25": 0.11904234737157822,
"rewards/frontier_coverage_5": 0.11904234737157822,
"rewards/frontier_ece_reward": 0.016392653435468675,
"signal/accuracy_reward/centered_abs_mean": 0.1531494140625,
"signal/accuracy_reward/group_std_mean": 0.20196100175380707,
"signal/accuracy_reward/group_zero_std_frac": 0.425,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07657470703125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07657470703125,
"signal/advantage_abs_mean": 0.08890287727117538,
"signal/advantage_pre_scale_abs_mean": 0.08890287727117538,
"signal/advantage_pre_scale_std": 0.12889028787612916,
"signal/advantage_std": 0.12889028787612916,
"signal/brier_reward/centered_abs_mean": 0.17595805823802949,
"signal/brier_reward/group_std_mean": 0.2205444246530533,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021994757279753686,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021994757279753686,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06260188668966293,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08365204632282257,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007825235836207867,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007825235836207867,
"signal/format_reward/centered_abs_mean": 0.0018798828125,
"signal/format_reward/group_std_mean": 0.0051879632286727425,
"signal/format_reward/group_zero_std_frac": 0.971875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00093994140625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00156771473120898,
"signal/frontier_aurc_reward/group_std_mean": 0.0023965310771018266,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8062092314939947e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8062092314939947e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2516119539737701,
"signal/frontier_coverage_1/group_std_mean": 0.31743831038475034,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004503853805363178,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004503853805363178,
"signal/frontier_coverage_10/centered_abs_mean": 0.2516119539737701,
"signal/frontier_coverage_10/group_std_mean": 0.31743831038475034,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004503853805363178,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004503853805363178,
"signal/frontier_coverage_15/centered_abs_mean": 0.2516119539737701,
"signal/frontier_coverage_15/group_std_mean": 0.31743831038475034,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004503853805363178,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004503853805363178,
"signal/frontier_coverage_20/centered_abs_mean": 0.2516119539737701,
"signal/frontier_coverage_20/group_std_mean": 0.31743831038475034,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004503853805363178,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004503853805363178,
"signal/frontier_coverage_25/centered_abs_mean": 0.2516119539737701,
"signal/frontier_coverage_25/group_std_mean": 0.31743831038475034,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004503853805363178,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004503853805363178,
"signal/frontier_coverage_5/centered_abs_mean": 0.2516119539737701,
"signal/frontier_coverage_5/group_std_mean": 0.31743831038475034,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004503853805363178,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004503853805363178,
"signal/frontier_ece_reward/centered_abs_mean": 0.04847268611192703,
"signal/frontier_ece_reward/group_std_mean": 0.06642893105745315,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006059085763990879,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006059085763990879,
"step": 50
},
{
"epoch": 0.16,
"eval_calibration/aurc": 0.581942546964942,
"eval_calibration/batch_distribution_entropy": 0.8694379702142905,
"eval_calibration/buffer_distribution_entropy": 0.9069787191727915,
"eval_calibration/confidence_entropy": 0.5197218388047841,
"eval_calibration/coverage@0%": 0.008064516129032258,
"eval_calibration/coverage@1%": 0.008064516129032258,
"eval_calibration/coverage@10%": 0.09400201612903225,
"eval_calibration/coverage@15%": 0.09400201612903225,
"eval_calibration/coverage@20%": 0.13306451612903225,
"eval_calibration/coverage@25%": 0.16507056451612903,
"eval_calibration/coverage@30%": 0.18850806451612903,
"eval_calibration/coverage@5%": 0.008064516129032258,
"eval_calibration/ece": 0.23922127016129036,
"eval_calibration/mean_confidence": 0.4247958669354839,
"eval_completions/clipped_ratio": 0.001953125,
"eval_completions/max_length": 596.75,
"eval_completions/max_terminated_length": 282.5,
"eval_completions/mean_length": 156.9091453552246,
"eval_completions/mean_terminated_length": 154.21026611328125,
"eval_completions/min_length": 81.75,
"eval_completions/min_terminated_length": 81.75,
"eval_loss": 0.0,
"eval_num_tokens": 167263836.0,
"eval_reward": 0.9125984758138657,
"eval_reward_std": 0.21672571077942848,
"eval_rewards/accuracy_reward": 0.37109375,
"eval_rewards/brier_reward": 0.7805570214986801,
"eval_rewards/confidence_uniqueness_reward": 0.8489478528499603,
"eval_rewards/format_reward": 0.998046875,
"eval_rewards/frontier_aurc_reward": -0.0037371510406956077,
"eval_rewards/frontier_coverage_1": 0.20891405642032623,
"eval_rewards/frontier_coverage_10": 0.20891405642032623,
"eval_rewards/frontier_coverage_15": 0.20891405642032623,
"eval_rewards/frontier_coverage_20": 0.20891405642032623,
"eval_rewards/frontier_coverage_25": 0.20891405642032623,
"eval_rewards/frontier_coverage_5": 0.20891405642032623,
"eval_rewards/frontier_ece_reward": 0.01575645850971341,
"eval_runtime": 25.9605,
"eval_samples_per_second": 19.26,
"eval_signal/accuracy_reward/centered_abs_mean": 0.453125,
"eval_signal/accuracy_reward/group_std_mean": 0.48237285763025284,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2265625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2265625,
"eval_signal/advantage_abs_mean": 0.19144192337989807,
"eval_signal/advantage_pre_scale_abs_mean": 0.19144192337989807,
"eval_signal/advantage_pre_scale_std": 0.21519476547837257,
"eval_signal/advantage_std": 0.21519476547837257,
"eval_signal/brier_reward/centered_abs_mean": 0.1979050487279892,
"eval_signal/brier_reward/group_std_mean": 0.24851922690868378,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02473813109099865,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02473813109099865,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06338747031986713,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07939925417304039,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007923433789983392,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007923433789983392,
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00245083641493693,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004007689480204135,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.386997170513496e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.386997170513496e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.39275022596120834,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4746975228190422,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.007030228851363063,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.007030228851363063,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.39275022596120834,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4746975228190422,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.007030228851363063,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.007030228851363063,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.39275022596120834,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4746975228190422,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007030228851363063,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.007030228851363063,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.39275022596120834,
"eval_signal/frontier_coverage_20/group_std_mean": 0.4746975228190422,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.007030228851363063,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.007030228851363063,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.39275022596120834,
"eval_signal/frontier_coverage_25/group_std_mean": 0.4746975228190422,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.007030228851363063,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.007030228851363063,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.39275022596120834,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4746975228190422,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.007030228851363063,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.007030228851363063,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.05723079666495323,
"eval_signal/frontier_ece_reward/group_std_mean": 0.08887772634625435,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007153849583119154,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007153849583119154,
"eval_steps_per_second": 0.154,
"step": 50
},
{
"calibration/aurc": 0.41148073571425414,
"calibration/batch_distribution_entropy": 0.9665159815171023,
"calibration/buffer_distribution_entropy": 0.9138723263746295,
"calibration/confidence_entropy": 0.49411856057329295,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0265625,
"calibration/coverage@20%": 0.0421875,
"calibration/coverage@25%": 0.07421875,
"calibration/coverage@30%": 0.173828125,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.16889198610565095,
"calibration/mean_confidence": 0.4860307061398975,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 455.4,
"completions/max_terminated_length": 455.4,
"completions/mean_length": 155.9359375,
"completions/mean_terminated_length": 155.9359375,
"completions/min_length": 61.0,
"completions/min_terminated_length": 61.0,
"epoch": 0.176,
"grad_norm": 0.00218537007458508,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 184097740.0,
"reward": 0.9889800906181335,
"reward_std": 0.11108050644397735,
"rewards/accuracy_reward": 0.53056640625,
"rewards/brier_reward": 0.7610553503036499,
"rewards/confidence_uniqueness_reward": 0.8953521609306335,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0036368840374052525,
"rewards/frontier_coverage_1": 0.13700859993696213,
"rewards/frontier_coverage_10": 0.13700859993696213,
"rewards/frontier_coverage_15": 0.13700859993696213,
"rewards/frontier_coverage_20": 0.13700859993696213,
"rewards/frontier_coverage_25": 0.13700859993696213,
"rewards/frontier_coverage_5": 0.13700859993696213,
"rewards/frontier_ece_reward": 0.018704849109053612,
"signal/accuracy_reward/centered_abs_mean": 0.137249755859375,
"signal/accuracy_reward/group_std_mean": 0.18366769552230836,
"signal/accuracy_reward/group_zero_std_frac": 0.465625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0686248779296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0686248779296875,
"signal/advantage_abs_mean": 0.084813691675663,
"signal/advantage_pre_scale_abs_mean": 0.084813691675663,
"signal/advantage_pre_scale_std": 0.12855230122804642,
"signal/advantage_std": 0.12855230122804642,
"signal/brier_reward/centered_abs_mean": 0.1753113955259323,
"signal/brier_reward/group_std_mean": 0.22086445689201356,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02191392444074154,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02191392444074154,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.046444494277238846,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05864086300134659,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005805561784654856,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005805561784654856,
"signal/format_reward/centered_abs_mean": 0.001312255859375,
"signal/format_reward/group_std_mean": 0.0035306816454976795,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023333648685365917,
"signal/frontier_aurc_reward/group_std_mean": 0.003552594967186451,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.176723159616813e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.176723159616813e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2309940814971924,
"signal/frontier_coverage_1/group_std_mean": 0.2958366394042969,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004134793765842915,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004134793765842915,
"signal/frontier_coverage_10/centered_abs_mean": 0.2309940814971924,
"signal/frontier_coverage_10/group_std_mean": 0.2958366394042969,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004134793765842915,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004134793765842915,
"signal/frontier_coverage_15/centered_abs_mean": 0.2309940814971924,
"signal/frontier_coverage_15/group_std_mean": 0.2958366394042969,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004134793765842915,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004134793765842915,
"signal/frontier_coverage_20/centered_abs_mean": 0.2309940814971924,
"signal/frontier_coverage_20/group_std_mean": 0.2958366394042969,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004134793765842915,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004134793765842915,
"signal/frontier_coverage_25/centered_abs_mean": 0.2309940814971924,
"signal/frontier_coverage_25/group_std_mean": 0.2958366394042969,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004134793765842915,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004134793765842915,
"signal/frontier_coverage_5/centered_abs_mean": 0.2309940814971924,
"signal/frontier_coverage_5/group_std_mean": 0.2958366394042969,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004134793765842915,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004134793765842915,
"signal/frontier_ece_reward/centered_abs_mean": 0.056712330877780916,
"signal/frontier_ece_reward/group_std_mean": 0.07508723661303521,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0070890413597226145,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0070890413597226145,
"step": 55
},
{
"calibration/aurc": 0.35110534394587606,
"calibration/batch_distribution_entropy": 0.9429500544063417,
"calibration/buffer_distribution_entropy": 0.9228942481105176,
"calibration/confidence_entropy": 0.44029570897924264,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0410958904109589,
"calibration/coverage@15%": 0.06105675146771037,
"calibration/coverage@20%": 0.11779216609589041,
"calibration/coverage@25%": 0.133446978962818,
"calibration/coverage@30%": 0.3190290178571429,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.1277301548740215,
"calibration/mean_confidence": 0.5812738931017611,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1073.6,
"completions/max_terminated_length": 403.4,
"completions/mean_length": 157.32744140625,
"completions/mean_terminated_length": 156.7888427734375,
"completions/min_length": 55.2,
"completions/min_terminated_length": 55.2,
"epoch": 0.192,
"grad_norm": 0.0026588267646729946,
"learning_rate": 1e-06,
"loss": 0.0012,
"num_tokens": 200523589.0,
"reward": 0.9892240047454834,
"reward_std": 0.11632921844720841,
"rewards/accuracy_reward": 0.5302734375,
"rewards/brier_reward": 0.7743942737579346,
"rewards/confidence_uniqueness_reward": 0.8843107342720031,
"rewards/format_reward": 0.9990234375,
"rewards/frontier_aurc_reward": -0.0036770137492567303,
"rewards/frontier_coverage_1": 0.1299908846616745,
"rewards/frontier_coverage_10": 0.1299908846616745,
"rewards/frontier_coverage_15": 0.1299908846616745,
"rewards/frontier_coverage_20": 0.1299908846616745,
"rewards/frontier_coverage_25": 0.1299908846616745,
"rewards/frontier_coverage_5": 0.1299908846616745,
"rewards/frontier_ece_reward": 0.026738068088889122,
"signal/accuracy_reward/centered_abs_mean": 0.1340087890625,
"signal/accuracy_reward/group_std_mean": 0.17480367571115493,
"signal/accuracy_reward/group_zero_std_frac": 0.50625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06700439453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06700439453125,
"signal/advantage_abs_mean": 0.08978550434112549,
"signal/advantage_pre_scale_abs_mean": 0.08978550434112549,
"signal/advantage_pre_scale_std": 0.1373380169272423,
"signal/advantage_std": 0.1373380169272423,
"signal/brier_reward/centered_abs_mean": 0.17695107460021972,
"signal/brier_reward/group_std_mean": 0.22387023866176606,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022118884325027465,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.022118884325027465,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.060887254774570465,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07694388255476951,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007610906846821308,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007610906846821308,
"signal/format_reward/centered_abs_mean": 0.00189208984375,
"signal/format_reward/group_std_mean": 0.00552427158690989,
"signal/format_reward/group_zero_std_frac": 0.96875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032003792934119702,
"signal/frontier_aurc_reward/group_std_mean": 0.004674886912107467,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.7286787341581655e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.7286787341581655e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19972616136074067,
"signal/frontier_coverage_1/group_std_mean": 0.25967652797698976,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003575098142027855,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003575098142027855,
"signal/frontier_coverage_10/centered_abs_mean": 0.19972616136074067,
"signal/frontier_coverage_10/group_std_mean": 0.25967652797698976,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003575098142027855,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003575098142027855,
"signal/frontier_coverage_15/centered_abs_mean": 0.19972616136074067,
"signal/frontier_coverage_15/group_std_mean": 0.25967652797698976,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003575098142027855,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003575098142027855,
"signal/frontier_coverage_20/centered_abs_mean": 0.19972616136074067,
"signal/frontier_coverage_20/group_std_mean": 0.25967652797698976,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003575098142027855,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003575098142027855,
"signal/frontier_coverage_25/centered_abs_mean": 0.19972616136074067,
"signal/frontier_coverage_25/group_std_mean": 0.25967652797698976,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003575098142027855,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003575098142027855,
"signal/frontier_coverage_5/centered_abs_mean": 0.19972616136074067,
"signal/frontier_coverage_5/group_std_mean": 0.25967652797698976,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003575098142027855,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003575098142027855,
"signal/frontier_ece_reward/centered_abs_mean": 0.06527650877833366,
"signal/frontier_ece_reward/group_std_mean": 0.08204113095998763,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008159563597291707,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008159563597291707,
"step": 60
},
{
"calibration/aurc": 0.29950539905285317,
"calibration/batch_distribution_entropy": 0.8949305749179007,
"calibration/buffer_distribution_entropy": 0.9254220054486639,
"calibration/confidence_entropy": 0.3947662440191003,
"calibration/coverage@0%": 0.011328125,
"calibration/coverage@1%": 0.011328125,
"calibration/coverage@10%": 0.11328125,
"calibration/coverage@15%": 0.155078125,
"calibration/coverage@20%": 0.29765625,
"calibration/coverage@25%": 0.43984375,
"calibration/coverage@30%": 0.565625,
"calibration/coverage@5%": 0.01953125,
"calibration/ece": 0.14129778085249506,
"calibration/mean_confidence": 0.6134717121758806,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 625.0,
"completions/max_terminated_length": 445.8,
"completions/mean_length": 157.33720703125,
"completions/mean_terminated_length": 157.20322265625,
"completions/min_length": 53.6,
"completions/min_terminated_length": 53.6,
"epoch": 0.208,
"grad_norm": 0.006198456976562738,
"learning_rate": 1e-06,
"loss": 0.0009,
"num_tokens": 217166946.0,
"reward": 1.0055761933326721,
"reward_std": 0.12328730970621109,
"rewards/accuracy_reward": 0.56953125,
"rewards/brier_reward": 0.7748962879180908,
"rewards/confidence_uniqueness_reward": 0.8710411190986633,
"rewards/format_reward": 0.99921875,
"rewards/frontier_aurc_reward": -0.0033608878031373022,
"rewards/frontier_coverage_1": 0.10492202192544937,
"rewards/frontier_coverage_10": 0.10492202192544937,
"rewards/frontier_coverage_15": 0.10492202192544937,
"rewards/frontier_coverage_20": 0.10492202192544937,
"rewards/frontier_coverage_25": 0.10492202192544937,
"rewards/frontier_coverage_5": 0.10492202192544937,
"rewards/frontier_ece_reward": 0.034004238247871396,
"signal/accuracy_reward/centered_abs_mean": 0.13963623046875,
"signal/accuracy_reward/group_std_mean": 0.186881947517395,
"signal/accuracy_reward/group_zero_std_frac": 0.459375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.069818115234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.069818115234375,
"signal/advantage_abs_mean": 0.09355712085962295,
"signal/advantage_pre_scale_abs_mean": 0.09355712085962295,
"signal/advantage_pre_scale_std": 0.14598494470119477,
"signal/advantage_std": 0.14598494470119477,
"signal/brier_reward/centered_abs_mean": 0.18199315667152405,
"signal/brier_reward/group_std_mean": 0.2308054745197296,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022749144583940506,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.022749144583940506,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08029745370149613,
"signal/confidence_uniqueness_reward/group_std_mean": 0.10032221227884293,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010037181712687016,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010037181712687016,
"signal/format_reward/centered_abs_mean": 0.00150146484375,
"signal/format_reward/group_std_mean": 0.004083108762279153,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000750732421875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000750732421875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036689775064587594,
"signal/frontier_aurc_reward/group_std_mean": 0.0054342994466423985,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.567469317815266e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.567469317815266e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18830927908420564,
"signal/frontier_coverage_1/group_std_mean": 0.25073177814483644,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00337073584087193,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00337073584087193,
"signal/frontier_coverage_10/centered_abs_mean": 0.18830927908420564,
"signal/frontier_coverage_10/group_std_mean": 0.25073177814483644,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00337073584087193,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00337073584087193,
"signal/frontier_coverage_15/centered_abs_mean": 0.18830927908420564,
"signal/frontier_coverage_15/group_std_mean": 0.25073177814483644,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00337073584087193,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00337073584087193,
"signal/frontier_coverage_20/centered_abs_mean": 0.18830927908420564,
"signal/frontier_coverage_20/group_std_mean": 0.25073177814483644,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00337073584087193,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00337073584087193,
"signal/frontier_coverage_25/centered_abs_mean": 0.18830927908420564,
"signal/frontier_coverage_25/group_std_mean": 0.25073177814483644,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00337073584087193,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00337073584087193,
"signal/frontier_coverage_5/centered_abs_mean": 0.18830927908420564,
"signal/frontier_coverage_5/group_std_mean": 0.25073177814483644,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00337073584087193,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00337073584087193,
"signal/frontier_ece_reward/centered_abs_mean": 0.06522954106330872,
"signal/frontier_ece_reward/group_std_mean": 0.08197070807218551,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00815369263291359,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00815369263291359,
"step": 65
},
{
"calibration/aurc": 0.32836925315578147,
"calibration/batch_distribution_entropy": 0.9369705991630376,
"calibration/buffer_distribution_entropy": 0.9288580259601347,
"calibration/confidence_entropy": 0.39559583362140976,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.08088307240704501,
"calibration/coverage@15%": 0.16330494740704501,
"calibration/coverage@20%": 0.30007262108610566,
"calibration/coverage@25%": 0.45020104574363995,
"calibration/coverage@30%": 0.5385235750978474,
"calibration/coverage@5%": 0.00859375,
"calibration/ece": 0.15778186841818104,
"calibration/mean_confidence": 0.5168032271358796,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 1333.4,
"completions/max_terminated_length": 457.4,
"completions/mean_length": 155.1294921875,
"completions/mean_terminated_length": 154.18482055664063,
"completions/min_length": 69.2,
"completions/min_terminated_length": 69.2,
"epoch": 0.224,
"grad_norm": 0.0021975021809339523,
"learning_rate": 1e-06,
"loss": 0.0019,
"num_tokens": 233908656.0,
"reward": 0.9999201774597168,
"reward_std": 0.11371375173330307,
"rewards/accuracy_reward": 0.54541015625,
"rewards/brier_reward": 0.7730230927467346,
"rewards/confidence_uniqueness_reward": 0.8957045555114747,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0036701176781207324,
"rewards/frontier_coverage_1": 0.14951011687517166,
"rewards/frontier_coverage_10": 0.14951011687517166,
"rewards/frontier_coverage_15": 0.14951011687517166,
"rewards/frontier_coverage_20": 0.14951011687517166,
"rewards/frontier_coverage_25": 0.14951011687517166,
"rewards/frontier_coverage_5": 0.14951011687517166,
"rewards/frontier_ece_reward": 0.023793780989944936,
"signal/accuracy_reward/centered_abs_mean": 0.129779052734375,
"signal/accuracy_reward/group_std_mean": 0.17483537197113036,
"signal/accuracy_reward/group_zero_std_frac": 0.490625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0648895263671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0648895263671875,
"signal/advantage_abs_mean": 0.0853941187262535,
"signal/advantage_pre_scale_abs_mean": 0.0853941187262535,
"signal/advantage_pre_scale_std": 0.1344393938779831,
"signal/advantage_std": 0.1344393938779831,
"signal/brier_reward/centered_abs_mean": 0.1850574344396591,
"signal/brier_reward/group_std_mean": 0.23437364101409913,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02313217930495739,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02313217930495739,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0607163667678833,
"signal/confidence_uniqueness_reward/group_std_mean": 0.07633327543735505,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007589545845985412,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007589545845985412,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_std_mean": 0.003866990143433213,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0035295146983116863,
"signal/frontier_aurc_reward/group_std_mean": 0.005266764014959335,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.317830993793905e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.317830993793905e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.20874929428100586,
"signal/frontier_coverage_1/group_std_mean": 0.2724692106246948,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003736612340435386,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003736612340435386,
"signal/frontier_coverage_10/centered_abs_mean": 0.20874929428100586,
"signal/frontier_coverage_10/group_std_mean": 0.2724692106246948,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003736612340435386,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003736612340435386,
"signal/frontier_coverage_15/centered_abs_mean": 0.20874929428100586,
"signal/frontier_coverage_15/group_std_mean": 0.2724692106246948,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003736612340435386,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003736612340435386,
"signal/frontier_coverage_20/centered_abs_mean": 0.20874929428100586,
"signal/frontier_coverage_20/group_std_mean": 0.2724692106246948,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003736612340435386,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003736612340435386,
"signal/frontier_coverage_25/centered_abs_mean": 0.20874929428100586,
"signal/frontier_coverage_25/group_std_mean": 0.2724692106246948,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003736612340435386,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003736612340435386,
"signal/frontier_coverage_5/centered_abs_mean": 0.20874929428100586,
"signal/frontier_coverage_5/group_std_mean": 0.2724692106246948,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003736612340435386,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003736612340435386,
"signal/frontier_ece_reward/centered_abs_mean": 0.056064750999212265,
"signal/frontier_ece_reward/group_std_mean": 0.07063852250576019,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007008093874901533,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007008093874901533,
"step": 70
},
{
"calibration/aurc": 0.3625857217361375,
"calibration/batch_distribution_entropy": 0.9289938394921892,
"calibration/buffer_distribution_entropy": 0.9335108002509734,
"calibration/confidence_entropy": 0.40780732318505564,
"calibration/coverage@0%": 0.0125,
"calibration/coverage@1%": 0.0125,
"calibration/coverage@10%": 0.10625,
"calibration/coverage@15%": 0.188671875,
"calibration/coverage@20%": 0.28954791462818,
"calibration/coverage@25%": 0.31848779965753427,
"calibration/coverage@30%": 0.3583766511741683,
"calibration/coverage@5%": 0.0125,
"calibration/ece": 0.18316088370397798,
"calibration/mean_confidence": 0.5532851800314939,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1052.6,
"completions/max_terminated_length": 372.2,
"completions/mean_length": 155.53154296875,
"completions/mean_terminated_length": 155.12725524902345,
"completions/min_length": 71.6,
"completions/min_terminated_length": 71.6,
"epoch": 0.24,
"grad_norm": 0.0017300838371738791,
"learning_rate": 1e-06,
"loss": 0.0015,
"num_tokens": 250752979.0,
"reward": 1.0221168398857117,
"reward_std": 0.11391993910074234,
"rewards/accuracy_reward": 0.595703125,
"rewards/brier_reward": 0.7676220774650574,
"rewards/confidence_uniqueness_reward": 0.9071184039115906,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.002962377923540771,
"rewards/frontier_coverage_1": 0.10575190596282483,
"rewards/frontier_coverage_10": 0.10575190596282483,
"rewards/frontier_coverage_15": 0.10575190596282483,
"rewards/frontier_coverage_20": 0.10575190596282483,
"rewards/frontier_coverage_25": 0.10575190596282483,
"rewards/frontier_coverage_5": 0.10575190596282483,
"rewards/frontier_ece_reward": 0.030115915276110173,
"signal/accuracy_reward/centered_abs_mean": 0.14744873046875,
"signal/accuracy_reward/group_std_mean": 0.1908715397119522,
"signal/accuracy_reward/group_zero_std_frac": 0.46875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.073724365234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.073724365234375,
"signal/advantage_abs_mean": 0.08909272402524948,
"signal/advantage_pre_scale_abs_mean": 0.08909272402524948,
"signal/advantage_pre_scale_std": 0.13636419773101807,
"signal/advantage_std": 0.13636419773101807,
"signal/brier_reward/centered_abs_mean": 0.1880528837442398,
"signal/brier_reward/group_std_mean": 0.2371793121099472,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023506610468029977,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.023506610468029977,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.053541189432144164,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06544121354818344,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0066926486790180205,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0066926486790180205,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032780596986413,
"signal/frontier_aurc_reward/group_std_mean": 0.004835722874850035,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.867726795258932e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.867726795258932e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.21991788744926452,
"signal/frontier_coverage_1/group_std_mean": 0.28908875584602356,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003936530090868473,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003936530090868473,
"signal/frontier_coverage_10/centered_abs_mean": 0.21991788744926452,
"signal/frontier_coverage_10/group_std_mean": 0.28908875584602356,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003936530090868473,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003936530090868473,
"signal/frontier_coverage_15/centered_abs_mean": 0.21991788744926452,
"signal/frontier_coverage_15/group_std_mean": 0.28908875584602356,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003936530090868473,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003936530090868473,
"signal/frontier_coverage_20/centered_abs_mean": 0.21991788744926452,
"signal/frontier_coverage_20/group_std_mean": 0.28908875584602356,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003936530090868473,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003936530090868473,
"signal/frontier_coverage_25/centered_abs_mean": 0.21991788744926452,
"signal/frontier_coverage_25/group_std_mean": 0.28908875584602356,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003936530090868473,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003936530090868473,
"signal/frontier_coverage_5/centered_abs_mean": 0.21991788744926452,
"signal/frontier_coverage_5/group_std_mean": 0.28908875584602356,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003936530090868473,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003936530090868473,
"signal/frontier_ece_reward/centered_abs_mean": 0.05272270888090134,
"signal/frontier_ece_reward/group_std_mean": 0.06555544286966324,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006590338610112667,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006590338610112667,
"step": 75
},
{
"calibration/aurc": 0.2991545526086894,
"calibration/batch_distribution_entropy": 0.917100104209035,
"calibration/buffer_distribution_entropy": 0.9366676884977749,
"calibration/confidence_entropy": 0.374372114525325,
"calibration/coverage@0%": 0.003515625,
"calibration/coverage@1%": 0.003515625,
"calibration/coverage@10%": 0.21171875,
"calibration/coverage@15%": 0.30390625,
"calibration/coverage@20%": 0.38515625,
"calibration/coverage@25%": 0.48828125,
"calibration/coverage@30%": 0.59453125,
"calibration/coverage@5%": 0.05703125,
"calibration/ece": 0.13661148577754403,
"calibration/mean_confidence": 0.5016744681145004,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 618.8,
"completions/max_terminated_length": 421.6,
"completions/mean_length": 152.70751953125,
"completions/mean_terminated_length": 152.5725067138672,
"completions/min_length": 72.2,
"completions/min_terminated_length": 72.2,
"epoch": 0.256,
"grad_norm": 0.004246581345796585,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 267371520.0,
"reward": 1.0221374034881592,
"reward_std": 0.10346800982952117,
"rewards/accuracy_reward": 0.58359375,
"rewards/brier_reward": 0.7830183148384094,
"rewards/confidence_uniqueness_reward": 0.9105902791023255,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002707870095036924,
"rewards/frontier_coverage_1": 0.14056457132101058,
"rewards/frontier_coverage_10": 0.14056457132101058,
"rewards/frontier_coverage_15": 0.14056457132101058,
"rewards/frontier_coverage_20": 0.14056457132101058,
"rewards/frontier_coverage_25": 0.14056457132101058,
"rewards/frontier_coverage_5": 0.14056457132101058,
"rewards/frontier_ece_reward": 0.029511995241045953,
"signal/accuracy_reward/centered_abs_mean": 0.1346435546875,
"signal/accuracy_reward/group_std_mean": 0.17472511231899263,
"signal/accuracy_reward/group_zero_std_frac": 0.50625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06732177734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06732177734375,
"signal/advantage_abs_mean": 0.07942169755697251,
"signal/advantage_pre_scale_abs_mean": 0.07942169755697251,
"signal/advantage_pre_scale_std": 0.12556920498609542,
"signal/advantage_std": 0.12556920498609542,
"signal/brier_reward/centered_abs_mean": 0.18081169128417968,
"signal/brier_reward/group_std_mean": 0.22884117662906647,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02260146141052246,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02260146141052246,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05464339852333069,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06681963428854942,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006830424815416336,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006830424815416336,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003052409226074815,
"signal/frontier_aurc_reward/group_std_mean": 0.00458022365346551,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4638121946481986e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4638121946481986e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22503261864185334,
"signal/frontier_coverage_1/group_std_mean": 0.29037556052207947,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004028083616867661,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004028083616867661,
"signal/frontier_coverage_10/centered_abs_mean": 0.22503261864185334,
"signal/frontier_coverage_10/group_std_mean": 0.29037556052207947,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004028083616867661,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004028083616867661,
"signal/frontier_coverage_15/centered_abs_mean": 0.22503261864185334,
"signal/frontier_coverage_15/group_std_mean": 0.29037556052207947,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004028083616867661,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004028083616867661,
"signal/frontier_coverage_20/centered_abs_mean": 0.22503261864185334,
"signal/frontier_coverage_20/group_std_mean": 0.29037556052207947,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004028083616867661,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004028083616867661,
"signal/frontier_coverage_25/centered_abs_mean": 0.22503261864185334,
"signal/frontier_coverage_25/group_std_mean": 0.29037556052207947,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004028083616867661,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004028083616867661,
"signal/frontier_coverage_5/centered_abs_mean": 0.22503261864185334,
"signal/frontier_coverage_5/group_std_mean": 0.29037556052207947,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004028083616867661,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004028083616867661,
"signal/frontier_ece_reward/centered_abs_mean": 0.04379315301775932,
"signal/frontier_ece_reward/group_std_mean": 0.056234460324048996,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005474144127219915,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005474144127219915,
"step": 80
},
{
"calibration/aurc": 0.363860036835348,
"calibration/batch_distribution_entropy": 0.9570702198192571,
"calibration/buffer_distribution_entropy": 0.9406893351280925,
"calibration/confidence_entropy": 0.4103928165246762,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.092578125,
"calibration/coverage@15%": 0.16953125,
"calibration/coverage@20%": 0.246875,
"calibration/coverage@25%": 0.2890625,
"calibration/coverage@30%": 0.352734375,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.16451098025568126,
"calibration/mean_confidence": 0.4967080330168347,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 592.6,
"completions/max_terminated_length": 364.2,
"completions/mean_length": 159.1400390625,
"completions/mean_terminated_length": 158.87093811035157,
"completions/min_length": 65.8,
"completions/min_terminated_length": 65.8,
"epoch": 0.272,
"grad_norm": 0.006795101799070835,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 283966810.0,
"reward": 1.0148004293441772,
"reward_std": 0.10174374878406525,
"rewards/accuracy_reward": 0.56806640625,
"rewards/brier_reward": 0.7736626982688903,
"rewards/confidence_uniqueness_reward": 0.9271513104438782,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0029262469615787268,
"rewards/frontier_coverage_1": 0.14347197711467743,
"rewards/frontier_coverage_10": 0.14347197711467743,
"rewards/frontier_coverage_15": 0.14347197711467743,
"rewards/frontier_coverage_20": 0.14347197711467743,
"rewards/frontier_coverage_25": 0.14347197711467743,
"rewards/frontier_coverage_5": 0.14347197711467743,
"rewards/frontier_ece_reward": 0.024034282192587852,
"signal/accuracy_reward/centered_abs_mean": 0.126641845703125,
"signal/accuracy_reward/group_std_mean": 0.1672067701816559,
"signal/accuracy_reward/group_zero_std_frac": 0.51875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0633209228515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0633209228515625,
"signal/advantage_abs_mean": 0.0784185141324997,
"signal/advantage_pre_scale_abs_mean": 0.0784185141324997,
"signal/advantage_pre_scale_std": 0.12320059090852738,
"signal/advantage_std": 0.12320059090852738,
"signal/brier_reward/centered_abs_mean": 0.1773090809583664,
"signal/brier_reward/group_std_mean": 0.2246845543384552,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0221636351197958,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0221636351197958,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.044048906117677686,
"signal/confidence_uniqueness_reward/group_std_mean": 0.055270757526159286,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005506113264709711,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005506113264709711,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002721975650638342,
"signal/frontier_aurc_reward/group_std_mean": 0.004123077914118767,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.872336139669642e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.872336139669642e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2235881805419922,
"signal/frontier_coverage_1/group_std_mean": 0.2899299919605255,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004002228379249573,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004002228379249573,
"signal/frontier_coverage_10/centered_abs_mean": 0.2235881805419922,
"signal/frontier_coverage_10/group_std_mean": 0.2899299919605255,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004002228379249573,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004002228379249573,
"signal/frontier_coverage_15/centered_abs_mean": 0.2235881805419922,
"signal/frontier_coverage_15/group_std_mean": 0.2899299919605255,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004002228379249573,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004002228379249573,
"signal/frontier_coverage_20/centered_abs_mean": 0.2235881805419922,
"signal/frontier_coverage_20/group_std_mean": 0.2899299919605255,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004002228379249573,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004002228379249573,
"signal/frontier_coverage_25/centered_abs_mean": 0.2235881805419922,
"signal/frontier_coverage_25/group_std_mean": 0.2899299919605255,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004002228379249573,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004002228379249573,
"signal/frontier_coverage_5/centered_abs_mean": 0.2235881805419922,
"signal/frontier_coverage_5/group_std_mean": 0.2899299919605255,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004002228379249573,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004002228379249573,
"signal/frontier_ece_reward/centered_abs_mean": 0.040379713475704196,
"signal/frontier_ece_reward/group_std_mean": 0.05107036232948303,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0050474641844630245,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0050474641844630245,
"step": 85
},
{
"calibration/aurc": 0.36422554783602823,
"calibration/batch_distribution_entropy": 0.9481445874016821,
"calibration/buffer_distribution_entropy": 0.9444843703339039,
"calibration/confidence_entropy": 0.4098080527850782,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.061328125,
"calibration/coverage@15%": 0.07109375,
"calibration/coverage@20%": 0.10078125,
"calibration/coverage@25%": 0.11484375,
"calibration/coverage@30%": 0.2484375,
"calibration/coverage@5%": 0.048046875,
"calibration/ece": 0.15095390849412324,
"calibration/mean_confidence": 0.5471646791933227,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1045.4,
"completions/max_terminated_length": 389.8,
"completions/mean_length": 155.80673828125,
"completions/mean_terminated_length": 155.402783203125,
"completions/min_length": 74.4,
"completions/min_terminated_length": 74.4,
"epoch": 0.288,
"grad_norm": 0.005369322374463081,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 300520447.0,
"reward": 1.0132103562355042,
"reward_std": 0.10634560137987137,
"rewards/accuracy_reward": 0.56318359375,
"rewards/brier_reward": 0.7749075770378113,
"rewards/confidence_uniqueness_reward": 0.9406777501106263,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0030640484765172005,
"rewards/frontier_coverage_1": 0.13456785976886748,
"rewards/frontier_coverage_10": 0.13456785976886748,
"rewards/frontier_coverage_15": 0.13456785976886748,
"rewards/frontier_coverage_20": 0.13456785976886748,
"rewards/frontier_coverage_25": 0.13456785976886748,
"rewards/frontier_coverage_5": 0.13456785976886748,
"rewards/frontier_ece_reward": 0.023743505217134954,
"signal/accuracy_reward/centered_abs_mean": 0.137371826171875,
"signal/accuracy_reward/group_std_mean": 0.18289859890937804,
"signal/accuracy_reward/group_zero_std_frac": 0.471875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0686859130859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0686859130859375,
"signal/advantage_abs_mean": 0.08051378279924393,
"signal/advantage_pre_scale_abs_mean": 0.08051378279924393,
"signal/advantage_pre_scale_std": 0.12663117051124573,
"signal/advantage_std": 0.12663117051124573,
"signal/brier_reward/centered_abs_mean": 0.1794063478708267,
"signal/brier_reward/group_std_mean": 0.22867206931114198,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02242579348385334,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02242579348385334,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03527098894119263,
"signal/confidence_uniqueness_reward/group_std_mean": 0.044175655394792554,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004408873617649078,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004408873617649078,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003065389487892389,
"signal/frontier_aurc_reward/group_std_mean": 0.004888421203941107,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4870470921741796e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4870470921741796e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22263219356536865,
"signal/frontier_coverage_1/group_std_mean": 0.2907342195510864,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003985116025432944,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003985116025432944,
"signal/frontier_coverage_10/centered_abs_mean": 0.22263219356536865,
"signal/frontier_coverage_10/group_std_mean": 0.2907342195510864,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003985116025432944,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003985116025432944,
"signal/frontier_coverage_15/centered_abs_mean": 0.22263219356536865,
"signal/frontier_coverage_15/group_std_mean": 0.2907342195510864,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003985116025432944,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003985116025432944,
"signal/frontier_coverage_20/centered_abs_mean": 0.22263219356536865,
"signal/frontier_coverage_20/group_std_mean": 0.2907342195510864,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003985116025432944,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003985116025432944,
"signal/frontier_coverage_25/centered_abs_mean": 0.22263219356536865,
"signal/frontier_coverage_25/group_std_mean": 0.2907342195510864,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003985116025432944,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003985116025432944,
"signal/frontier_coverage_5/centered_abs_mean": 0.22263219356536865,
"signal/frontier_coverage_5/group_std_mean": 0.2907342195510864,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003985116025432944,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003985116025432944,
"signal/frontier_ece_reward/centered_abs_mean": 0.04063198119401932,
"signal/frontier_ece_reward/group_std_mean": 0.05170249417424202,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005078997649252415,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005078997649252415,
"step": 90
},
{
"calibration/aurc": 0.29253903941152404,
"calibration/batch_distribution_entropy": 0.9048455136474193,
"calibration/buffer_distribution_entropy": 0.9462939197873748,
"calibration/confidence_entropy": 0.39088382746890016,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.1375,
"calibration/coverage@15%": 0.1875,
"calibration/coverage@20%": 0.257421875,
"calibration/coverage@25%": 0.36213460127201563,
"calibration/coverage@30%": 0.5430260824363993,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.14318374226956557,
"calibration/mean_confidence": 0.587406877153499,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 419.4,
"completions/max_terminated_length": 419.4,
"completions/mean_length": 160.600390625,
"completions/mean_terminated_length": 160.600390625,
"completions/min_length": 75.6,
"completions/min_terminated_length": 75.6,
"epoch": 0.304,
"grad_norm": 0.00885615311563015,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 317094947.0,
"reward": 1.028617262840271,
"reward_std": 0.09624975174665451,
"rewards/accuracy_reward": 0.59638671875,
"rewards/brier_reward": 0.7746265411376954,
"rewards/confidence_uniqueness_reward": 0.9423886299133301,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.003213143954053521,
"rewards/frontier_coverage_1": 0.12373490929603577,
"rewards/frontier_coverage_10": 0.12373490929603577,
"rewards/frontier_coverage_15": 0.12373490929603577,
"rewards/frontier_coverage_20": 0.12373490929603577,
"rewards/frontier_coverage_25": 0.12373490929603577,
"rewards/frontier_coverage_5": 0.12373490929603577,
"rewards/frontier_ece_reward": 0.021694989316165446,
"signal/accuracy_reward/centered_abs_mean": 0.119732666015625,
"signal/accuracy_reward/group_std_mean": 0.15952616930007935,
"signal/accuracy_reward/group_zero_std_frac": 0.5375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0598663330078125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0598663330078125,
"signal/advantage_abs_mean": 0.07218151390552521,
"signal/advantage_pre_scale_abs_mean": 0.07218151390552521,
"signal/advantage_pre_scale_std": 0.11681736111640931,
"signal/advantage_std": 0.11681736111640931,
"signal/brier_reward/centered_abs_mean": 0.17017588913440704,
"signal/brier_reward/group_std_mean": 0.21725533604621888,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02127198614180088,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02127198614180088,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03307611271739006,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04140571765601635,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004134514089673758,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004134514089673758,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002763637388125062,
"signal/frontier_aurc_reward/group_std_mean": 0.0042175163049250845,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.9469107761979106e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.9469107761979106e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2136286973953247,
"signal/frontier_coverage_1/group_std_mean": 0.2769513875246048,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038239536806941032,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038239536806941032,
"signal/frontier_coverage_10/centered_abs_mean": 0.2136286973953247,
"signal/frontier_coverage_10/group_std_mean": 0.2769513875246048,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038239536806941032,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038239536806941032,
"signal/frontier_coverage_15/centered_abs_mean": 0.2136286973953247,
"signal/frontier_coverage_15/group_std_mean": 0.2769513875246048,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038239536806941032,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038239536806941032,
"signal/frontier_coverage_20/centered_abs_mean": 0.2136286973953247,
"signal/frontier_coverage_20/group_std_mean": 0.2769513875246048,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038239536806941032,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038239536806941032,
"signal/frontier_coverage_25/centered_abs_mean": 0.2136286973953247,
"signal/frontier_coverage_25/group_std_mean": 0.2769513875246048,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038239536806941032,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038239536806941032,
"signal/frontier_coverage_5/centered_abs_mean": 0.2136286973953247,
"signal/frontier_coverage_5/group_std_mean": 0.2769513875246048,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038239536806941032,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038239536806941032,
"signal/frontier_ece_reward/centered_abs_mean": 0.03785905465483665,
"signal/frontier_ece_reward/group_std_mean": 0.04792718142271042,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004732381831854582,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004732381831854582,
"step": 95
},
{
"calibration/aurc": 0.24936746292749862,
"calibration/batch_distribution_entropy": 0.898710830523522,
"calibration/buffer_distribution_entropy": 0.9460732940566577,
"calibration/confidence_entropy": 0.3851945609629142,
"calibration/coverage@0%": 0.015264187866927592,
"calibration/coverage@1%": 0.015264187866927592,
"calibration/coverage@10%": 0.2279705846379648,
"calibration/coverage@15%": 0.3795835371819961,
"calibration/coverage@20%": 0.45266481164383554,
"calibration/coverage@25%": 0.5382422639432486,
"calibration/coverage@30%": 0.6234313845401174,
"calibration/coverage@5%": 0.10597251100782779,
"calibration/ece": 0.1376192145922726,
"calibration/mean_confidence": 0.6140883395295473,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 647.4,
"completions/max_terminated_length": 489.6,
"completions/mean_length": 164.50380859375,
"completions/mean_terminated_length": 164.3696044921875,
"completions/min_length": 68.4,
"completions/min_terminated_length": 68.4,
"epoch": 0.32,
"grad_norm": 0.003183668712154031,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 333868170.0,
"reward": 1.037460708618164,
"reward_std": 0.08647488206624984,
"rewards/accuracy_reward": 0.60302734375,
"rewards/brier_reward": 0.8063218474388123,
"rewards/confidence_uniqueness_reward": 0.944849681854248,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.0029652828816324472,
"rewards/frontier_coverage_1": 0.1294178381562233,
"rewards/frontier_coverage_10": 0.1294178381562233,
"rewards/frontier_coverage_15": 0.1294178381562233,
"rewards/frontier_coverage_20": 0.1294178381562233,
"rewards/frontier_coverage_25": 0.1294178381562233,
"rewards/frontier_coverage_5": 0.1294178381562233,
"rewards/frontier_ece_reward": 0.02797740586102009,
"signal/accuracy_reward/centered_abs_mean": 0.090289306640625,
"signal/accuracy_reward/group_std_mean": 0.12296751439571381,
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451446533203125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0451446533203125,
"signal/advantage_abs_mean": 0.06498634666204453,
"signal/advantage_pre_scale_abs_mean": 0.06498634666204453,
"signal/advantage_pre_scale_std": 0.11145332753658295,
"signal/advantage_std": 0.11145332753658295,
"signal/brier_reward/centered_abs_mean": 0.1528707653284073,
"signal/brier_reward/group_std_mean": 0.19931194186210632,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01910884566605091,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01910884566605091,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030894938856363297,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03997356966137886,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003861867357045412,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003861867357045412,
"signal/format_reward/centered_abs_mean": 0.001123046875,
"signal/format_reward/group_std_mean": 0.0029782545287162067,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005615234375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0005615234375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002860796870663762,
"signal/frontier_aurc_reward/group_std_mean": 0.0042446996085345745,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.120826244819909e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.120826244819909e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17833363115787507,
"signal/frontier_coverage_1/group_std_mean": 0.23469134867191316,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031921718269586562,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031921718269586562,
"signal/frontier_coverage_10/centered_abs_mean": 0.17833363115787507,
"signal/frontier_coverage_10/group_std_mean": 0.23469134867191316,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031921718269586562,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031921718269586562,
"signal/frontier_coverage_15/centered_abs_mean": 0.17833363115787507,
"signal/frontier_coverage_15/group_std_mean": 0.23469134867191316,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031921718269586562,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031921718269586562,
"signal/frontier_coverage_20/centered_abs_mean": 0.17833363115787507,
"signal/frontier_coverage_20/group_std_mean": 0.23469134867191316,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031921718269586562,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031921718269586562,
"signal/frontier_coverage_25/centered_abs_mean": 0.17833363115787507,
"signal/frontier_coverage_25/group_std_mean": 0.23469134867191316,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031921718269586562,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031921718269586562,
"signal/frontier_coverage_5/centered_abs_mean": 0.17833363115787507,
"signal/frontier_coverage_5/group_std_mean": 0.23469134867191316,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031921718269586562,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031921718269586562,
"signal/frontier_ece_reward/centered_abs_mean": 0.037183419615030286,
"signal/frontier_ece_reward/group_std_mean": 0.04669267162680626,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004647927451878786,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004647927451878786,
"step": 100
},
{
"epoch": 0.32,
"eval_calibration/aurc": 0.5361608562879745,
"eval_calibration/batch_distribution_entropy": 0.8548767773066339,
"eval_calibration/buffer_distribution_entropy": 0.9456258884469577,
"eval_calibration/confidence_entropy": 0.3856233495527517,
"eval_calibration/coverage@0%": 0.0234375,
"eval_calibration/coverage@1%": 0.0234375,
"eval_calibration/coverage@10%": 0.0234375,
"eval_calibration/coverage@15%": 0.0859375,
"eval_calibration/coverage@20%": 0.1015625,
"eval_calibration/coverage@25%": 0.1171875,
"eval_calibration/coverage@30%": 0.125,
"eval_calibration/coverage@5%": 0.0234375,
"eval_calibration/ece": 0.2574609375,
"eval_calibration/mean_confidence": 0.5250390625,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 313.0,
"eval_completions/max_terminated_length": 313.0,
"eval_completions/mean_length": 166.9853172302246,
"eval_completions/mean_terminated_length": 166.9853172302246,
"eval_completions/min_length": 93.75,
"eval_completions/min_terminated_length": 93.75,
"eval_loss": 0.0,
"eval_num_tokens": 333868170.0,
"eval_reward": 0.9285456091165543,
"eval_reward_std": 0.2409592606127262,
"eval_rewards/accuracy_reward": 0.3984375,
"eval_rewards/brier_reward": 0.7509243190288544,
"eval_rewards/confidence_uniqueness_reward": 0.890869140625,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.004297213105019182,
"eval_rewards/frontier_coverage_1": 0.20684602111577988,
"eval_rewards/frontier_coverage_10": 0.20684602111577988,
"eval_rewards/frontier_coverage_15": 0.20684602111577988,
"eval_rewards/frontier_coverage_20": 0.20684602111577988,
"eval_rewards/frontier_coverage_25": 0.20684602111577988,
"eval_rewards/frontier_coverage_5": 0.20684602111577988,
"eval_rewards/frontier_ece_reward": 0.015714747074525803,
"eval_runtime": 17.5982,
"eval_samples_per_second": 28.412,
"eval_signal/accuracy_reward/centered_abs_mean": 0.466064453125,
"eval_signal/accuracy_reward/group_std_mean": 0.489865705370903,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2330322265625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2330322265625,
"eval_signal/advantage_abs_mean": 0.21615619584918022,
"eval_signal/advantage_pre_scale_abs_mean": 0.21615619584918022,
"eval_signal/advantage_pre_scale_std": 0.23838016018271446,
"eval_signal/advantage_std": 0.23838016018271446,
"eval_signal/brier_reward/centered_abs_mean": 0.26976919919252396,
"eval_signal/brier_reward/group_std_mean": 0.3225868046283722,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.033721149899065495,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.033721149899065495,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0496673583984375,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06072596646845341,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0062084197998046875,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0062084197998046875,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005205620895139873,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008366801775991917,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.31806080188835e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.31806080188835e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.36348968744277954,
"eval_signal/frontier_coverage_1/group_std_mean": 0.453485868871212,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00650646525900811,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00650646525900811,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.36348968744277954,
"eval_signal/frontier_coverage_10/group_std_mean": 0.453485868871212,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00650646525900811,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00650646525900811,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.36348968744277954,
"eval_signal/frontier_coverage_15/group_std_mean": 0.453485868871212,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00650646525900811,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00650646525900811,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.36348968744277954,
"eval_signal/frontier_coverage_20/group_std_mean": 0.453485868871212,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00650646525900811,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00650646525900811,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.36348968744277954,
"eval_signal/frontier_coverage_25/group_std_mean": 0.453485868871212,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00650646525900811,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00650646525900811,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.36348968744277954,
"eval_signal/frontier_coverage_5/group_std_mean": 0.453485868871212,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00650646525900811,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00650646525900811,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.05878347251564264,
"eval_signal/frontier_ece_reward/group_std_mean": 0.07752788066864014,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00734793406445533,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00734793406445533,
"eval_steps_per_second": 0.227,
"step": 100
},
{
"calibration/aurc": 0.3170759678648111,
"calibration/batch_distribution_entropy": 0.9268662687477087,
"calibration/buffer_distribution_entropy": 0.9480795484053474,
"calibration/confidence_entropy": 0.4027125187855171,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.008203125,
"calibration/coverage@15%": 0.06001657662082514,
"calibration/coverage@20%": 0.1137394093811395,
"calibration/coverage@25%": 0.34513675712180747,
"calibration/coverage@30%": 0.5636135498526522,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.13889236064162716,
"calibration/mean_confidence": 0.5570489445717209,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 922.4,
"completions/max_terminated_length": 491.0,
"completions/mean_length": 168.534375,
"completions/mean_terminated_length": 168.26749267578126,
"completions/min_length": 63.2,
"completions/min_terminated_length": 63.2,
"epoch": 0.336,
"grad_norm": 0.011471702717244625,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 350316394.0,
"reward": 1.0325323581695556,
"reward_std": 0.09860386103391647,
"rewards/accuracy_reward": 0.5978515625,
"rewards/brier_reward": 0.7879295349121094,
"rewards/confidence_uniqueness_reward": 0.9501477599143981,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0029000297654420137,
"rewards/frontier_coverage_1": 0.12584335058927537,
"rewards/frontier_coverage_10": 0.12584335058927537,
"rewards/frontier_coverage_15": 0.12584335058927537,
"rewards/frontier_coverage_20": 0.12584335058927537,
"rewards/frontier_coverage_25": 0.12584335058927537,
"rewards/frontier_coverage_5": 0.12584335058927537,
"rewards/frontier_ece_reward": 0.02501910924911499,
"signal/accuracy_reward/centered_abs_mean": 0.11759033203125,
"signal/accuracy_reward/group_std_mean": 0.1579001486301422,
"signal/accuracy_reward/group_zero_std_frac": 0.540625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.058795166015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.058795166015625,
"signal/advantage_abs_mean": 0.07503360360860825,
"signal/advantage_pre_scale_abs_mean": 0.07503360360860825,
"signal/advantage_pre_scale_std": 0.12452945411205292,
"signal/advantage_std": 0.12452945411205292,
"signal/brier_reward/centered_abs_mean": 0.16325247883796692,
"signal/brier_reward/group_std_mean": 0.20753192603588105,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020406559854745866,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020406559854745866,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02659556120634079,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03490939736366272,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033244451507925986,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033244451507925986,
"signal/format_reward/centered_abs_mean": 0.000909423828125,
"signal/format_reward/group_std_mean": 0.002030306123197079,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004547119140625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004547119140625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027562730945646765,
"signal/frontier_aurc_reward/group_std_mean": 0.004085430596023798,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.933728851028718e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.933728851028718e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18382967710494996,
"signal/frontier_coverage_1/group_std_mean": 0.24163539111614227,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032905511558055877,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032905511558055877,
"signal/frontier_coverage_10/centered_abs_mean": 0.18382967710494996,
"signal/frontier_coverage_10/group_std_mean": 0.24163539111614227,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032905511558055877,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032905511558055877,
"signal/frontier_coverage_15/centered_abs_mean": 0.18382967710494996,
"signal/frontier_coverage_15/group_std_mean": 0.24163539111614227,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032905511558055877,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032905511558055877,
"signal/frontier_coverage_20/centered_abs_mean": 0.18382967710494996,
"signal/frontier_coverage_20/group_std_mean": 0.24163539111614227,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032905511558055877,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032905511558055877,
"signal/frontier_coverage_25/centered_abs_mean": 0.18382967710494996,
"signal/frontier_coverage_25/group_std_mean": 0.24163539111614227,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032905511558055877,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032905511558055877,
"signal/frontier_coverage_5/centered_abs_mean": 0.18382967710494996,
"signal/frontier_coverage_5/group_std_mean": 0.24163539111614227,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032905511558055877,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032905511558055877,
"signal/frontier_ece_reward/centered_abs_mean": 0.034998999536037446,
"signal/frontier_ece_reward/group_std_mean": 0.04407136589288711,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004374874942004681,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004374874942004681,
"step": 105
},
{
"calibration/aurc": 0.3232366665037657,
"calibration/batch_distribution_entropy": 0.8812064271537494,
"calibration/buffer_distribution_entropy": 0.9553933684063487,
"calibration/confidence_entropy": 0.35902633675891904,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.16585815190802347,
"calibration/coverage@15%": 0.24369801859099804,
"calibration/coverage@20%": 0.3855461105675147,
"calibration/coverage@25%": 0.44301614481409,
"calibration/coverage@30%": 0.5180536020058708,
"calibration/coverage@5%": 0.018003913894324854,
"calibration/ece": 0.13547729630579644,
"calibration/mean_confidence": 0.49073865731901967,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 630.8,
"completions/max_terminated_length": 398.8,
"completions/mean_length": 170.27373046875,
"completions/mean_terminated_length": 170.14059448242188,
"completions/min_length": 78.4,
"completions/min_terminated_length": 78.4,
"epoch": 0.352,
"grad_norm": 0.012238552793860435,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 367320413.0,
"reward": 1.001007616519928,
"reward_std": 0.09876850098371506,
"rewards/accuracy_reward": 0.530078125,
"rewards/brier_reward": 0.7849451780319214,
"rewards/confidence_uniqueness_reward": 0.943973433971405,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0035563561599701644,
"rewards/frontier_coverage_1": 0.16559779942035674,
"rewards/frontier_coverage_10": 0.16559779942035674,
"rewards/frontier_coverage_15": 0.16559779942035674,
"rewards/frontier_coverage_20": 0.16559779942035674,
"rewards/frontier_coverage_25": 0.16559779942035674,
"rewards/frontier_coverage_5": 0.16559779942035674,
"rewards/frontier_ece_reward": 0.018229612335562705,
"signal/accuracy_reward/centered_abs_mean": 0.1173095703125,
"signal/accuracy_reward/group_std_mean": 0.15779185593128203,
"signal/accuracy_reward/group_zero_std_frac": 0.5375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05865478515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05865478515625,
"signal/advantage_abs_mean": 0.07470797747373581,
"signal/advantage_pre_scale_abs_mean": 0.07470797747373581,
"signal/advantage_pre_scale_std": 0.1232941284775734,
"signal/advantage_std": 0.1232941284775734,
"signal/brier_reward/centered_abs_mean": 0.16526894867420197,
"signal/brier_reward/group_std_mean": 0.21336513757705688,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020658618584275246,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020658618584275246,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031228836625814438,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04048001915216446,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0039036045782268047,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0039036045782268047,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0033744067884981634,
"signal/frontier_aurc_reward/group_std_mean": 0.0051686098799109455,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.040188018232584e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.040188018232584e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1909989595413208,
"signal/frontier_coverage_1/group_std_mean": 0.2494662880897522,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034188813529908656,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034188813529908656,
"signal/frontier_coverage_10/centered_abs_mean": 0.1909989595413208,
"signal/frontier_coverage_10/group_std_mean": 0.2494662880897522,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034188813529908656,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034188813529908656,
"signal/frontier_coverage_15/centered_abs_mean": 0.1909989595413208,
"signal/frontier_coverage_15/group_std_mean": 0.2494662880897522,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034188813529908656,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034188813529908656,
"signal/frontier_coverage_20/centered_abs_mean": 0.1909989595413208,
"signal/frontier_coverage_20/group_std_mean": 0.2494662880897522,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034188813529908656,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034188813529908656,
"signal/frontier_coverage_25/centered_abs_mean": 0.1909989595413208,
"signal/frontier_coverage_25/group_std_mean": 0.2494662880897522,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034188813529908656,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034188813529908656,
"signal/frontier_coverage_5/centered_abs_mean": 0.1909989595413208,
"signal/frontier_coverage_5/group_std_mean": 0.2494662880897522,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034188813529908656,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034188813529908656,
"signal/frontier_ece_reward/centered_abs_mean": 0.030147189274430275,
"signal/frontier_ece_reward/group_std_mean": 0.03832725360989571,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037683986593037844,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037683986593037844,
"step": 110
},
{
"calibration/aurc": 0.3797556873444166,
"calibration/batch_distribution_entropy": 0.8650275868499007,
"calibration/buffer_distribution_entropy": 0.9613778439528119,
"calibration/confidence_entropy": 0.3575837773714898,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.061328125,
"calibration/coverage@15%": 0.1,
"calibration/coverage@20%": 0.26171875,
"calibration/coverage@25%": 0.326953125,
"calibration/coverage@30%": 0.375,
"calibration/coverage@5%": 0.0078125,
"calibration/ece": 0.1910163477538498,
"calibration/mean_confidence": 0.5746932088470622,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 897.8,
"completions/max_terminated_length": 462.4,
"completions/mean_length": 172.3744140625,
"completions/mean_terminated_length": 172.1084747314453,
"completions/min_length": 67.8,
"completions/min_terminated_length": 67.8,
"epoch": 0.368,
"grad_norm": 0.022365767508745193,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 384150999.0,
"reward": 1.0263174772262573,
"reward_std": 0.09078062623739243,
"rewards/accuracy_reward": 0.578125,
"rewards/brier_reward": 0.8040992617607117,
"rewards/confidence_uniqueness_reward": 0.9418166399002075,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0037387159187346696,
"rewards/frontier_coverage_1": 0.1589544117450714,
"rewards/frontier_coverage_10": 0.1589544117450714,
"rewards/frontier_coverage_15": 0.1589544117450714,
"rewards/frontier_coverage_20": 0.1589544117450714,
"rewards/frontier_coverage_25": 0.1589544117450714,
"rewards/frontier_coverage_5": 0.1589544117450714,
"rewards/frontier_ece_reward": 0.01803905926644802,
"signal/accuracy_reward/centered_abs_mean": 0.0984130859375,
"signal/accuracy_reward/group_std_mean": 0.1334820196032524,
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04920654296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04920654296875,
"signal/advantage_abs_mean": 0.06783413365483285,
"signal/advantage_pre_scale_abs_mean": 0.06783413365483285,
"signal/advantage_pre_scale_std": 0.11692542880773545,
"signal/advantage_std": 0.11692542880773545,
"signal/brier_reward/centered_abs_mean": 0.15209992229938507,
"signal/brier_reward/group_std_mean": 0.19859039783477783,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019012490287423134,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.019012490287423134,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.032923289388418195,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04358488842844963,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004115411173552274,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004115411173552274,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.002762135770171881,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003569593699648976,
"signal/frontier_aurc_reward/group_std_mean": 0.005398597475141287,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.389572881744243e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.389572881744243e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17154284417629242,
"signal/frontier_coverage_1/group_std_mean": 0.22746075689792633,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030706167686730622,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030706167686730622,
"signal/frontier_coverage_10/centered_abs_mean": 0.17154284417629242,
"signal/frontier_coverage_10/group_std_mean": 0.22746075689792633,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030706167686730622,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030706167686730622,
"signal/frontier_coverage_15/centered_abs_mean": 0.17154284417629242,
"signal/frontier_coverage_15/group_std_mean": 0.22746075689792633,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030706167686730622,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030706167686730622,
"signal/frontier_coverage_20/centered_abs_mean": 0.17154284417629242,
"signal/frontier_coverage_20/group_std_mean": 0.22746075689792633,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030706167686730622,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030706167686730622,
"signal/frontier_coverage_25/centered_abs_mean": 0.17154284417629242,
"signal/frontier_coverage_25/group_std_mean": 0.22746075689792633,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030706167686730622,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030706167686730622,
"signal/frontier_coverage_5/centered_abs_mean": 0.17154284417629242,
"signal/frontier_coverage_5/group_std_mean": 0.22746075689792633,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030706167686730622,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030706167686730622,
"signal/frontier_ece_reward/centered_abs_mean": 0.023829102888703345,
"signal/frontier_ece_reward/group_std_mean": 0.030207440629601477,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002978637861087918,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002978637861087918,
"step": 115
},
{
"calibration/aurc": 0.34978752891207854,
"calibration/batch_distribution_entropy": 0.8743274420308669,
"calibration/buffer_distribution_entropy": 0.9651506073276552,
"calibration/confidence_entropy": 0.3524804429751578,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.15099070450097846,
"calibration/coverage@15%": 0.22602433953033269,
"calibration/coverage@20%": 0.2737088735322896,
"calibration/coverage@25%": 0.32334500366927593,
"calibration/coverage@30%": 0.3589125183463796,
"calibration/coverage@5%": 0.03287671232876712,
"calibration/ece": 0.15381430206719035,
"calibration/mean_confidence": 0.4920425182661553,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 913.8,
"completions/max_terminated_length": 489.8,
"completions/mean_length": 172.56142578125,
"completions/mean_terminated_length": 172.16313781738282,
"completions/min_length": 80.0,
"completions/min_terminated_length": 80.0,
"epoch": 0.384,
"grad_norm": 0.01273356843739748,
"learning_rate": 1e-06,
"loss": 0.0013,
"num_tokens": 400774540.0,
"reward": 1.024810528755188,
"reward_std": 0.08978459835052491,
"rewards/accuracy_reward": 0.57666015625,
"rewards/brier_reward": 0.8028544902801513,
"rewards/confidence_uniqueness_reward": 0.9432775259017945,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0032439586240798233,
"rewards/frontier_coverage_1": 0.15212270766496658,
"rewards/frontier_coverage_10": 0.15212270766496658,
"rewards/frontier_coverage_15": 0.15212270766496658,
"rewards/frontier_coverage_20": 0.15212270766496658,
"rewards/frontier_coverage_25": 0.15212270766496658,
"rewards/frontier_coverage_5": 0.15212270766496658,
"rewards/frontier_ece_reward": 0.017425185441970824,
"signal/accuracy_reward/centered_abs_mean": 0.106951904296875,
"signal/accuracy_reward/group_std_mean": 0.1465958684682846,
"signal/accuracy_reward/group_zero_std_frac": 0.5625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0534759521484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0534759521484375,
"signal/advantage_abs_mean": 0.06634962484240532,
"signal/advantage_pre_scale_abs_mean": 0.06634962484240532,
"signal/advantage_pre_scale_std": 0.11549568325281143,
"signal/advantage_std": 0.11549568325281143,
"signal/brier_reward/centered_abs_mean": 0.14517641365528106,
"signal/brier_reward/group_std_mean": 0.18895911276340485,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018147051706910132,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018147051706910132,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03095161318778992,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04060640558600426,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00386895164847374,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00386895164847374,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003309731697663665,
"signal/frontier_aurc_reward/group_std_mean": 0.00533204497769475,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.9244197473162785e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.9244197473162785e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17565982341766356,
"signal/frontier_coverage_1/group_std_mean": 0.23116243183612822,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031443107407540085,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031443107407540085,
"signal/frontier_coverage_10/centered_abs_mean": 0.17565982341766356,
"signal/frontier_coverage_10/group_std_mean": 0.23116243183612822,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031443107407540085,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031443107407540085,
"signal/frontier_coverage_15/centered_abs_mean": 0.17565982341766356,
"signal/frontier_coverage_15/group_std_mean": 0.23116243183612822,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031443107407540085,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031443107407540085,
"signal/frontier_coverage_20/centered_abs_mean": 0.17565982341766356,
"signal/frontier_coverage_20/group_std_mean": 0.23116243183612822,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031443107407540085,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031443107407540085,
"signal/frontier_coverage_25/centered_abs_mean": 0.17565982341766356,
"signal/frontier_coverage_25/group_std_mean": 0.23116243183612822,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031443107407540085,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031443107407540085,
"signal/frontier_coverage_5/centered_abs_mean": 0.17565982341766356,
"signal/frontier_coverage_5/group_std_mean": 0.23116243183612822,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031443107407540085,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031443107407540085,
"signal/frontier_ece_reward/centered_abs_mean": 0.01915326751768589,
"signal/frontier_ece_reward/group_std_mean": 0.024320138990879057,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002394158439710736,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002394158439710736,
"step": 120
},
{
"calibration/aurc": 0.4435299753075933,
"calibration/batch_distribution_entropy": 0.9099935259372789,
"calibration/buffer_distribution_entropy": 0.9669285677408318,
"calibration/confidence_entropy": 0.3907481024090136,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.018003913894324854,
"calibration/coverage@25%": 0.04261328889432485,
"calibration/coverage@30%": 0.11968107876712328,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.22419826224495804,
"calibration/mean_confidence": 0.545322742176537,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 634.4,
"completions/max_terminated_length": 409.4,
"completions/mean_length": 170.94443359375,
"completions/mean_terminated_length": 170.81153564453126,
"completions/min_length": 85.8,
"completions/min_terminated_length": 85.8,
"epoch": 0.4,
"grad_norm": 0.0018827987369149923,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 417561459.0,
"reward": 1.020148515701294,
"reward_std": 0.09094813764095307,
"rewards/accuracy_reward": 0.572265625,
"rewards/brier_reward": 0.7909515976905823,
"rewards/confidence_uniqueness_reward": 0.9425244092941284,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.004015334136784077,
"rewards/frontier_coverage_1": 0.14759057611227036,
"rewards/frontier_coverage_10": 0.14759057611227036,
"rewards/frontier_coverage_15": 0.14759057611227036,
"rewards/frontier_coverage_20": 0.14759057611227036,
"rewards/frontier_coverage_25": 0.14238842576742172,
"rewards/frontier_coverage_5": 0.14759057611227036,
"rewards/frontier_ece_reward": 0.01355019873008132,
"signal/accuracy_reward/centered_abs_mean": 0.1117431640625,
"signal/accuracy_reward/group_std_mean": 0.15235530138015746,
"signal/accuracy_reward/group_zero_std_frac": 0.54375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05587158203125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05587158203125,
"signal/advantage_abs_mean": 0.06790433377027512,
"signal/advantage_pre_scale_abs_mean": 0.06790433377027512,
"signal/advantage_pre_scale_std": 0.1179421067237854,
"signal/advantage_std": 0.1179421067237854,
"signal/brier_reward/centered_abs_mean": 0.15299761891365052,
"signal/brier_reward/group_std_mean": 0.19623776376247407,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019124702364206315,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.019124702364206315,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03119489848613739,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03921703845262527,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038993623107671737,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038993623107671737,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.004192573670297861,
"signal/frontier_aurc_reward/group_std_mean": 0.006507566943764686,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.504706663894468e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.504706663894468e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17459246814250945,
"signal/frontier_coverage_1/group_std_mean": 0.22909881174564362,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031252051237970592,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031252051237970592,
"signal/frontier_coverage_10/centered_abs_mean": 0.17459246814250945,
"signal/frontier_coverage_10/group_std_mean": 0.22909881174564362,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031252051237970592,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031252051237970592,
"signal/frontier_coverage_15/centered_abs_mean": 0.17459246814250945,
"signal/frontier_coverage_15/group_std_mean": 0.22909881174564362,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031252051237970592,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031252051237970592,
"signal/frontier_coverage_20/centered_abs_mean": 0.17459246814250945,
"signal/frontier_coverage_20/group_std_mean": 0.22909881174564362,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031252051237970592,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031252051237970592,
"signal/frontier_coverage_25/centered_abs_mean": 0.1689872920513153,
"signal/frontier_coverage_25/group_std_mean": 0.22179524898529052,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030248723924160004,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030248723924160004,
"signal/frontier_coverage_5/centered_abs_mean": 0.17459246814250945,
"signal/frontier_coverage_5/group_std_mean": 0.22909881174564362,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031252051237970592,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031252051237970592,
"signal/frontier_ece_reward/centered_abs_mean": 0.018266384676098823,
"signal/frontier_ece_reward/group_std_mean": 0.0225957952439785,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002283298084512353,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002283298084512353,
"step": 125
},
{
"calibration/aurc": 0.32036860924247845,
"calibration/batch_distribution_entropy": 0.9040425655296254,
"calibration/buffer_distribution_entropy": 0.9658882837152956,
"calibration/confidence_entropy": 0.3965625761816952,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.026171875,
"calibration/coverage@15%": 0.05859375,
"calibration/coverage@20%": 0.123828125,
"calibration/coverage@25%": 0.226171875,
"calibration/coverage@30%": 0.46484375,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.16183175553137435,
"calibration/mean_confidence": 0.5771191877791881,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 634.0,
"completions/max_terminated_length": 412.8,
"completions/mean_length": 172.9611328125,
"completions/mean_terminated_length": 172.82837829589843,
"completions/min_length": 85.4,
"completions/min_terminated_length": 85.4,
"epoch": 0.416,
"grad_norm": 0.0024297665804624557,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 434213765.0,
"reward": 1.0161998510360717,
"reward_std": 0.09269836395978928,
"rewards/accuracy_reward": 0.56416015625,
"rewards/brier_reward": 0.7885978817939758,
"rewards/confidence_uniqueness_reward": 0.9452348351478577,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0034807201474905012,
"rewards/frontier_coverage_1": 0.1499410331249237,
"rewards/frontier_coverage_10": 0.1499410331249237,
"rewards/frontier_coverage_15": 0.1499410331249237,
"rewards/frontier_coverage_20": 0.1499410331249237,
"rewards/frontier_coverage_25": 0.1424618661403656,
"rewards/frontier_coverage_5": 0.1499410331249237,
"rewards/frontier_ece_reward": 0.012646915204823018,
"signal/accuracy_reward/centered_abs_mean": 0.114910888671875,
"signal/accuracy_reward/group_std_mean": 0.15072711706161498,
"signal/accuracy_reward/group_zero_std_frac": 0.571875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0574554443359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0574554443359375,
"signal/advantage_abs_mean": 0.07128219231963158,
"signal/advantage_pre_scale_abs_mean": 0.07128219231963158,
"signal/advantage_pre_scale_std": 0.11946070045232773,
"signal/advantage_std": 0.11946070045232773,
"signal/brier_reward/centered_abs_mean": 0.16007616817951204,
"signal/brier_reward/group_std_mean": 0.20323581397533416,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020009521022439004,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020009521022439004,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02985215187072754,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03883992582559585,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037315189838409424,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037315189838409424,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0035416937433183195,
"signal/frontier_aurc_reward/group_std_mean": 0.005710090417414904,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.339631509035825e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.339631509035825e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18811068534851075,
"signal/frontier_coverage_1/group_std_mean": 0.24402026534080506,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003367181122303009,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003367181122303009,
"signal/frontier_coverage_10/centered_abs_mean": 0.18811068534851075,
"signal/frontier_coverage_10/group_std_mean": 0.24402026534080506,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003367181122303009,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003367181122303009,
"signal/frontier_coverage_15/centered_abs_mean": 0.18811068534851075,
"signal/frontier_coverage_15/group_std_mean": 0.24402026534080506,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003367181122303009,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003367181122303009,
"signal/frontier_coverage_20/centered_abs_mean": 0.18811068534851075,
"signal/frontier_coverage_20/group_std_mean": 0.24402026534080506,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003367181122303009,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003367181122303009,
"signal/frontier_coverage_25/centered_abs_mean": 0.175497430562973,
"signal/frontier_coverage_25/group_std_mean": 0.22769122421741486,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003141403943300247,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003141403943300247,
"signal/frontier_coverage_5/centered_abs_mean": 0.18811068534851075,
"signal/frontier_coverage_5/group_std_mean": 0.24402026534080506,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003367181122303009,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003367181122303009,
"signal/frontier_ece_reward/centered_abs_mean": 0.016132255643606187,
"signal/frontier_ece_reward/group_std_mean": 0.02017546221613884,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020165319554507734,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020165319554507734,
"step": 130
},
{
"calibration/aurc": 0.27492773853385566,
"calibration/batch_distribution_entropy": 0.8936948367188059,
"calibration/buffer_distribution_entropy": 0.9619736028048184,
"calibration/confidence_entropy": 0.374110594039602,
"calibration/coverage@0%": 0.014453125,
"calibration/coverage@1%": 0.014453125,
"calibration/coverage@10%": 0.11875,
"calibration/coverage@15%": 0.22734375,
"calibration/coverage@20%": 0.333203125,
"calibration/coverage@25%": 0.4421875,
"calibration/coverage@30%": 0.512109375,
"calibration/coverage@5%": 0.058984375,
"calibration/ece": 0.13765157935061254,
"calibration/mean_confidence": 0.5749263041564635,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 627.4,
"completions/max_terminated_length": 393.2,
"completions/mean_length": 172.41337890625,
"completions/mean_terminated_length": 172.28014221191407,
"completions/min_length": 88.2,
"completions/min_terminated_length": 88.2,
"epoch": 0.432,
"grad_norm": 0.014152178540825844,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 450993614.0,
"reward": 1.034668791294098,
"reward_std": 0.08032770156860351,
"rewards/accuracy_reward": 0.595703125,
"rewards/brier_reward": 0.81131010055542,
"rewards/confidence_uniqueness_reward": 0.9441461324691772,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0028154389234259724,
"rewards/frontier_coverage_1": 0.14911295846104622,
"rewards/frontier_coverage_10": 0.14911295846104622,
"rewards/frontier_coverage_15": 0.14911295846104622,
"rewards/frontier_coverage_20": 0.14911295846104622,
"rewards/frontier_coverage_25": 0.1374576583504677,
"rewards/frontier_coverage_5": 0.14911295846104622,
"rewards/frontier_ece_reward": 0.013817432709038258,
"signal/accuracy_reward/centered_abs_mean": 0.1028564453125,
"signal/accuracy_reward/group_std_mean": 0.1392101302742958,
"signal/accuracy_reward/group_zero_std_frac": 0.5875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05142822265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05142822265625,
"signal/advantage_abs_mean": 0.06035290732979774,
"signal/advantage_pre_scale_abs_mean": 0.06035290732979774,
"signal/advantage_pre_scale_std": 0.10709730833768845,
"signal/advantage_std": 0.10709730833768845,
"signal/brier_reward/centered_abs_mean": 0.1353680819272995,
"signal/brier_reward/group_std_mean": 0.17475684881210327,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016921010240912436,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016921010240912436,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030345895141363145,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03937292844057083,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003793236892670393,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003793236892670393,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028929989319294693,
"signal/frontier_aurc_reward/group_std_mean": 0.00475033214315772,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.178467981750146e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.178467981750146e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17074471116065978,
"signal/frontier_coverage_1/group_std_mean": 0.2249886155128479,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030563301406800747,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030563301406800747,
"signal/frontier_coverage_10/centered_abs_mean": 0.17074471116065978,
"signal/frontier_coverage_10/group_std_mean": 0.2249886155128479,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030563301406800747,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030563301406800747,
"signal/frontier_coverage_15/centered_abs_mean": 0.17074471116065978,
"signal/frontier_coverage_15/group_std_mean": 0.2249886155128479,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030563301406800747,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030563301406800747,
"signal/frontier_coverage_20/centered_abs_mean": 0.17074471116065978,
"signal/frontier_coverage_20/group_std_mean": 0.2249886155128479,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030563301406800747,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030563301406800747,
"signal/frontier_coverage_25/centered_abs_mean": 0.15415639579296112,
"signal/frontier_coverage_25/group_std_mean": 0.20353280007839203,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002759399451315403,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002759399451315403,
"signal/frontier_coverage_5/centered_abs_mean": 0.17074471116065978,
"signal/frontier_coverage_5/group_std_mean": 0.2249886155128479,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030563301406800747,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030563301406800747,
"signal/frontier_ece_reward/centered_abs_mean": 0.013533397577702999,
"signal/frontier_ece_reward/group_std_mean": 0.017023883387446405,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016916746972128749,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016916746972128749,
"step": 135
},
{
"calibration/aurc": 0.28909308310100607,
"calibration/batch_distribution_entropy": 0.9344033893289716,
"calibration/buffer_distribution_entropy": 0.9556897831631528,
"calibration/confidence_entropy": 0.4277412313639977,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.05859375,
"calibration/coverage@15%": 0.141796875,
"calibration/coverage@20%": 0.204296875,
"calibration/coverage@25%": 0.253125,
"calibration/coverage@30%": 0.57734375,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.1422481416663646,
"calibration/mean_confidence": 0.5711865293477547,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 609.8,
"completions/max_terminated_length": 383.2,
"completions/mean_length": 179.7435546875,
"completions/mean_terminated_length": 179.6114044189453,
"completions/min_length": 70.6,
"completions/min_terminated_length": 70.6,
"epoch": 0.448,
"grad_norm": 0.009844657965004444,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 467786988.0,
"reward": 1.0257550001144409,
"reward_std": 0.08076644837856292,
"rewards/accuracy_reward": 0.57421875,
"rewards/brier_reward": 0.8085735321044922,
"rewards/confidence_uniqueness_reward": 0.9484065532684326,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0029993959702551364,
"rewards/frontier_coverage_1": 0.16901236772537231,
"rewards/frontier_coverage_10": 0.16901236772537231,
"rewards/frontier_coverage_15": 0.16901236772537231,
"rewards/frontier_coverage_20": 0.16901236772537231,
"rewards/frontier_coverage_25": 0.16056638807058335,
"rewards/frontier_coverage_5": 0.16901236772537231,
"rewards/frontier_ece_reward": 0.011342884600162506,
"signal/accuracy_reward/centered_abs_mean": 0.09781494140625,
"signal/accuracy_reward/group_std_mean": 0.13312481343746185,
"signal/accuracy_reward/group_zero_std_frac": 0.6,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.048907470703125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.048907470703125,
"signal/advantage_abs_mean": 0.0605968214571476,
"signal/advantage_pre_scale_abs_mean": 0.0605968214571476,
"signal/advantage_pre_scale_std": 0.10682090073823929,
"signal/advantage_std": 0.10682090073823929,
"signal/brier_reward/centered_abs_mean": 0.1433545708656311,
"signal/brier_reward/group_std_mean": 0.1853357344865799,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01791932135820389,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01791932135820389,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0269392192363739,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03449588306248188,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033674024045467375,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033674024045467375,
"signal/format_reward/centered_abs_mean": 0.001068115234375,
"signal/format_reward/group_std_mean": 0.0013125419616699218,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005340576171875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0005340576171875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002783289086073637,
"signal/frontier_aurc_reward/group_std_mean": 0.004721877304837108,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.982087411917746e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.982087411917746e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18696335852146148,
"signal/frontier_coverage_1/group_std_mean": 0.24190506637096404,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003346643876284361,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003346643876284361,
"signal/frontier_coverage_10/centered_abs_mean": 0.18696335852146148,
"signal/frontier_coverage_10/group_std_mean": 0.24190506637096404,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003346643876284361,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003346643876284361,
"signal/frontier_coverage_15/centered_abs_mean": 0.18696335852146148,
"signal/frontier_coverage_15/group_std_mean": 0.24190506637096404,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003346643876284361,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003346643876284361,
"signal/frontier_coverage_20/centered_abs_mean": 0.18696335852146148,
"signal/frontier_coverage_20/group_std_mean": 0.24190506637096404,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003346643876284361,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003346643876284361,
"signal/frontier_coverage_25/centered_abs_mean": 0.1663324326276779,
"signal/frontier_coverage_25/group_std_mean": 0.21630672812461854,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002977350587025285,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002977350587025285,
"signal/frontier_coverage_5/centered_abs_mean": 0.18696335852146148,
"signal/frontier_coverage_5/group_std_mean": 0.24190506637096404,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003346643876284361,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003346643876284361,
"signal/frontier_ece_reward/centered_abs_mean": 0.01322302669286728,
"signal/frontier_ece_reward/group_std_mean": 0.01661177948117256,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00165287833660841,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00165287833660841,
"step": 140
},
{
"calibration/aurc": 0.4461233259007119,
"calibration/batch_distribution_entropy": 0.9497364830495236,
"calibration/buffer_distribution_entropy": 0.9507200407112958,
"calibration/confidence_entropy": 0.4329203666177947,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.009375,
"calibration/coverage@20%": 0.03671875,
"calibration/coverage@25%": 0.058203125,
"calibration/coverage@30%": 0.220459271037182,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.154924640530329,
"calibration/mean_confidence": 0.491454960609382,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 425.2,
"completions/max_terminated_length": 425.2,
"completions/mean_length": 185.42412109375,
"completions/mean_terminated_length": 185.42412109375,
"completions/min_length": 89.6,
"completions/min_terminated_length": 89.6,
"epoch": 0.464,
"grad_norm": 0.001580472569912672,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 484856547.0,
"reward": 1.0002707242965698,
"reward_std": 0.07840840741991997,
"rewards/accuracy_reward": 0.52705078125,
"rewards/brier_reward": 0.7894474506378174,
"rewards/confidence_uniqueness_reward": 0.946292269229889,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.00347891659475863,
"rewards/frontier_coverage_1": 0.1787361979484558,
"rewards/frontier_coverage_10": 0.1787361979484558,
"rewards/frontier_coverage_15": 0.1787361979484558,
"rewards/frontier_coverage_20": 0.1787361979484558,
"rewards/frontier_coverage_25": 0.1631140410900116,
"rewards/frontier_coverage_5": 0.1787361979484558,
"rewards/frontier_ece_reward": 0.007778843771666289,
"signal/accuracy_reward/centered_abs_mean": 0.095733642578125,
"signal/accuracy_reward/group_std_mean": 0.12840082347393036,
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0478668212890625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0478668212890625,
"signal/advantage_abs_mean": 0.059501688182353976,
"signal/advantage_pre_scale_abs_mean": 0.059501688182353976,
"signal/advantage_pre_scale_std": 0.10387323051691055,
"signal/advantage_std": 0.10387323051691055,
"signal/brier_reward/centered_abs_mean": 0.13797992914915086,
"signal/brier_reward/group_std_mean": 0.17975262105464934,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017247491143643857,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017247491143643857,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02722937911748886,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03534681871533394,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034036723896861075,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034036723896861075,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025480398908257484,
"signal/frontier_aurc_reward/group_std_mean": 0.004373569739982486,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.560991146718152e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.560991146718152e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17491473257541656,
"signal/frontier_coverage_1/group_std_mean": 0.23137665688991546,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00313097364269197,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00313097364269197,
"signal/frontier_coverage_10/centered_abs_mean": 0.17491473257541656,
"signal/frontier_coverage_10/group_std_mean": 0.23137665688991546,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00313097364269197,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00313097364269197,
"signal/frontier_coverage_15/centered_abs_mean": 0.17491473257541656,
"signal/frontier_coverage_15/group_std_mean": 0.23137665688991546,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00313097364269197,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00313097364269197,
"signal/frontier_coverage_20/centered_abs_mean": 0.17491473257541656,
"signal/frontier_coverage_20/group_std_mean": 0.23137665688991546,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00313097364269197,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00313097364269197,
"signal/frontier_coverage_25/centered_abs_mean": 0.15687368512153627,
"signal/frontier_coverage_25/group_std_mean": 0.20789795517921447,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028080389834940433,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028080389834940433,
"signal/frontier_coverage_5/centered_abs_mean": 0.17491473257541656,
"signal/frontier_coverage_5/group_std_mean": 0.23137665688991546,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00313097364269197,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00313097364269197,
"signal/frontier_ece_reward/centered_abs_mean": 0.012085023522377013,
"signal/frontier_ece_reward/group_std_mean": 0.015317281149327755,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015106279402971267,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015106279402971267,
"step": 145
},
{
"calibration/aurc": 0.3023387085329794,
"calibration/batch_distribution_entropy": 0.9264211093467802,
"calibration/buffer_distribution_entropy": 0.9453633408800123,
"calibration/confidence_entropy": 0.413005030259173,
"calibration/coverage@0%": 0.015258836839530332,
"calibration/coverage@1%": 0.015258836839530332,
"calibration/coverage@10%": 0.06418251590019569,
"calibration/coverage@15%": 0.08179504036203522,
"calibration/coverage@20%": 0.28142581947162426,
"calibration/coverage@25%": 0.3337955601761252,
"calibration/coverage@30%": 0.49436766144814087,
"calibration/coverage@5%": 0.031697193003913895,
"calibration/ece": 0.15141720566899758,
"calibration/mean_confidence": 0.5136556092663461,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 732.4,
"completions/max_terminated_length": 501.2,
"completions/mean_length": 187.25947265625,
"completions/mean_terminated_length": 186.99586791992186,
"completions/min_length": 94.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.48,
"grad_norm": 0.0019054191652685404,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 501822116.0,
"reward": 1.0201605319976808,
"reward_std": 0.08553178757429122,
"rewards/accuracy_reward": 0.5736328125,
"rewards/brier_reward": 0.7931627750396728,
"rewards/confidence_uniqueness_reward": 0.9459454536437988,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0027874172665178776,
"rewards/frontier_coverage_1": 0.14067478179931642,
"rewards/frontier_coverage_10": 0.14067478179931642,
"rewards/frontier_coverage_15": 0.14067478179931642,
"rewards/frontier_coverage_20": 0.14067478179931642,
"rewards/frontier_coverage_25": 0.13163903802633287,
"rewards/frontier_coverage_5": 0.14067478179931642,
"rewards/frontier_ece_reward": 0.009641882218420505,
"signal/accuracy_reward/centered_abs_mean": 0.12015380859375,
"signal/accuracy_reward/group_std_mean": 0.15791453421115875,
"signal/accuracy_reward/group_zero_std_frac": 0.553125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.060076904296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.060076904296875,
"signal/advantage_abs_mean": 0.06460028663277625,
"signal/advantage_pre_scale_abs_mean": 0.06460028663277625,
"signal/advantage_pre_scale_std": 0.11053272932767869,
"signal/advantage_std": 0.11053272932767869,
"signal/brier_reward/centered_abs_mean": 0.14295845627784728,
"signal/brier_reward/group_std_mean": 0.1843875139951706,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01786980703473091,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01786980703473091,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027392278984189035,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03549604080617428,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034240348730236294,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034240348730236294,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022343342658132314,
"signal/frontier_aurc_reward/group_std_mean": 0.0036009853240102528,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.999458203907125e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.999458203907125e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19157818257808684,
"signal/frontier_coverage_1/group_std_mean": 0.24868603348731994,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034292493481189014,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034292493481189014,
"signal/frontier_coverage_10/centered_abs_mean": 0.19157818257808684,
"signal/frontier_coverage_10/group_std_mean": 0.24868603348731994,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034292493481189014,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034292493481189014,
"signal/frontier_coverage_15/centered_abs_mean": 0.19157818257808684,
"signal/frontier_coverage_15/group_std_mean": 0.24868603348731994,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034292493481189014,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034292493481189014,
"signal/frontier_coverage_20/centered_abs_mean": 0.19157818257808684,
"signal/frontier_coverage_20/group_std_mean": 0.24868603348731994,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034292493481189014,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034292493481189014,
"signal/frontier_coverage_25/centered_abs_mean": 0.17257940769195557,
"signal/frontier_coverage_25/group_std_mean": 0.2242441803216934,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003089171182364225,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003089171182364225,
"signal/frontier_coverage_5/centered_abs_mean": 0.19157818257808684,
"signal/frontier_coverage_5/group_std_mean": 0.24868603348731994,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034292493481189014,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034292493481189014,
"signal/frontier_ece_reward/centered_abs_mean": 0.012200168147683144,
"signal/frontier_ece_reward/group_std_mean": 0.015492185577750206,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001525021018460393,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001525021018460393,
"step": 150
},
{
"epoch": 0.48,
"eval_calibration/aurc": 0.46974372773433737,
"eval_calibration/batch_distribution_entropy": 0.8729489041595544,
"eval_calibration/buffer_distribution_entropy": 0.9417117821904345,
"eval_calibration/confidence_entropy": 0.4194894160323942,
"eval_calibration/coverage@0%": 0.0390625,
"eval_calibration/coverage@1%": 0.0390625,
"eval_calibration/coverage@10%": 0.0390625,
"eval_calibration/coverage@15%": 0.0390625,
"eval_calibration/coverage@20%": 0.15625,
"eval_calibration/coverage@25%": 0.1640625,
"eval_calibration/coverage@30%": 0.1953125,
"eval_calibration/coverage@5%": 0.0390625,
"eval_calibration/ece": 0.1833765625,
"eval_calibration/mean_confidence": 0.49400156249999994,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 406.25,
"eval_completions/max_terminated_length": 406.25,
"eval_completions/mean_length": 190.00639724731445,
"eval_completions/mean_terminated_length": 190.00639724731445,
"eval_completions/min_length": 105.5,
"eval_completions/min_terminated_length": 105.5,
"eval_loss": 0.0,
"eval_num_tokens": 501822116.0,
"eval_reward": 0.9429960995912552,
"eval_reward_std": 0.22927699238061905,
"eval_rewards/accuracy_reward": 0.416015625,
"eval_rewards/brier_reward": 0.7913370132446289,
"eval_rewards/confidence_uniqueness_reward": 0.894775390625,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0035256121191196144,
"eval_rewards/frontier_coverage_1": 0.21800321713089943,
"eval_rewards/frontier_coverage_10": 0.21800321713089943,
"eval_rewards/frontier_coverage_15": 0.21800321713089943,
"eval_rewards/frontier_coverage_20": 0.21800321713089943,
"eval_rewards/frontier_coverage_25": 0.1946805864572525,
"eval_rewards/frontier_coverage_5": 0.21800321713089943,
"eval_rewards/frontier_ece_reward": 0.010330205783247948,
"eval_runtime": 20.2833,
"eval_samples_per_second": 24.651,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4698486328125,
"eval_signal/accuracy_reward/group_std_mean": 0.4919331818819046,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23492431640625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23492431640625,
"eval_signal/advantage_abs_mean": 0.20919283106923103,
"eval_signal/advantage_pre_scale_abs_mean": 0.20919283106923103,
"eval_signal/advantage_pre_scale_std": 0.22680530324578285,
"eval_signal/advantage_std": 0.22680530324578285,
"eval_signal/brier_reward/centered_abs_mean": 0.21989374607801437,
"eval_signal/brier_reward/group_std_mean": 0.27512865513563156,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027486718259751797,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.027486718259751797,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.046142578125,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.054415556602180004,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005767822265625,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005767822265625,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0042980361031368375,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.00823443685658276,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.693484076298773e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.693484076298773e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.36622023582458496,
"eval_signal/frontier_coverage_1/group_std_mean": 0.45468851178884506,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0065553419990465045,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0065553419990465045,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.36622023582458496,
"eval_signal/frontier_coverage_10/group_std_mean": 0.45468851178884506,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0065553419990465045,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0065553419990465045,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.36622023582458496,
"eval_signal/frontier_coverage_15/group_std_mean": 0.45468851178884506,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0065553419990465045,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0065553419990465045,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.36622023582458496,
"eval_signal/frontier_coverage_20/group_std_mean": 0.45468851178884506,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0065553419990465045,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0065553419990465045,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.3283776342868805,
"eval_signal/frontier_coverage_25/group_std_mean": 0.40934164822101593,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0058779597748070955,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0058779597748070955,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.36622023582458496,
"eval_signal/frontier_coverage_5/group_std_mean": 0.45468851178884506,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0065553419990465045,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0065553419990465045,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.018566378857940435,
"eval_signal/frontier_ece_reward/group_std_mean": 0.02370068058371544,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023207973572425544,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023207973572425544,
"eval_steps_per_second": 0.197,
"step": 150
},
{
"calibration/aurc": 0.3998863695118646,
"calibration/batch_distribution_entropy": 0.9386218344736872,
"calibration/buffer_distribution_entropy": 0.9403166197438353,
"calibration/confidence_entropy": 0.4160008952255986,
"calibration/coverage@0%": 0.002734375,
"calibration/coverage@1%": 0.002734375,
"calibration/coverage@10%": 0.11640625,
"calibration/coverage@15%": 0.16171875,
"calibration/coverage@20%": 0.180859375,
"calibration/coverage@25%": 0.23671875,
"calibration/coverage@30%": 0.27578125,
"calibration/coverage@5%": 0.0515625,
"calibration/ece": 0.16494032522185803,
"calibration/mean_confidence": 0.5358418390577956,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 471.2,
"completions/max_terminated_length": 471.2,
"completions/mean_length": 191.12421875,
"completions/mean_terminated_length": 191.12421875,
"completions/min_length": 94.6,
"completions/min_terminated_length": 94.6,
"epoch": 0.496,
"grad_norm": 0.0010813616681843996,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 519087068.0,
"reward": 1.0434123754501343,
"reward_std": 0.07760989367961883,
"rewards/accuracy_reward": 0.61640625,
"rewards/brier_reward": 0.8119624257087708,
"rewards/confidence_uniqueness_reward": 0.9533378601074218,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0029467219952493905,
"rewards/frontier_coverage_1": 0.1270418345928192,
"rewards/frontier_coverage_10": 0.1270418345928192,
"rewards/frontier_coverage_15": 0.1270418345928192,
"rewards/frontier_coverage_20": 0.1270418345928192,
"rewards/frontier_coverage_25": 0.10793070495128632,
"rewards/frontier_coverage_5": 0.1270418345928192,
"rewards/frontier_ece_reward": 0.010378126800060273,
"signal/accuracy_reward/centered_abs_mean": 0.0924072265625,
"signal/accuracy_reward/group_std_mean": 0.1291539266705513,
"signal/accuracy_reward/group_zero_std_frac": 0.603125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04620361328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04620361328125,
"signal/advantage_abs_mean": 0.05744131505489349,
"signal/advantage_pre_scale_abs_mean": 0.05744131505489349,
"signal/advantage_pre_scale_std": 0.10349351465702057,
"signal/advantage_std": 0.10349351465702057,
"signal/brier_reward/centered_abs_mean": 0.13018125295639038,
"signal/brier_reward/group_std_mean": 0.1682298392057419,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016272656619548798,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016272656619548798,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0224942684173584,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02873026542365551,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028117835521698,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028117835521698,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027974717784672977,
"signal/frontier_aurc_reward/group_std_mean": 0.004662458691745997,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.0074743921868506e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.0074743921868506e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1607126325368881,
"signal/frontier_coverage_1/group_std_mean": 0.2118363171815872,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002876756014302373,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002876756014302373,
"signal/frontier_coverage_10/centered_abs_mean": 0.1607126325368881,
"signal/frontier_coverage_10/group_std_mean": 0.2118363171815872,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002876756014302373,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002876756014302373,
"signal/frontier_coverage_15/centered_abs_mean": 0.1607126325368881,
"signal/frontier_coverage_15/group_std_mean": 0.2118363171815872,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002876756014302373,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002876756014302373,
"signal/frontier_coverage_20/centered_abs_mean": 0.1607126325368881,
"signal/frontier_coverage_20/group_std_mean": 0.2118363171815872,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002876756014302373,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002876756014302373,
"signal/frontier_coverage_25/centered_abs_mean": 0.145123627781868,
"signal/frontier_coverage_25/group_std_mean": 0.19127892851829528,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002597712818533182,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002597712818533182,
"signal/frontier_coverage_5/centered_abs_mean": 0.1607126325368881,
"signal/frontier_coverage_5/group_std_mean": 0.2118363171815872,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002876756014302373,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002876756014302373,
"signal/frontier_ece_reward/centered_abs_mean": 0.012259660847485065,
"signal/frontier_ece_reward/group_std_mean": 0.015376238338649272,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015324576059356331,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015324576059356331,
"step": 155
},
{
"calibration/aurc": 0.30546842444616046,
"calibration/batch_distribution_entropy": 0.9425257723813347,
"calibration/buffer_distribution_entropy": 0.9392028630333185,
"calibration/confidence_entropy": 0.43838925650671695,
"calibration/coverage@0%": 0.01953125,
"calibration/coverage@1%": 0.01953125,
"calibration/coverage@10%": 0.15625,
"calibration/coverage@15%": 0.301953125,
"calibration/coverage@20%": 0.3921875,
"calibration/coverage@25%": 0.45546875,
"calibration/coverage@30%": 0.48984375,
"calibration/coverage@5%": 0.062890625,
"calibration/ece": 0.14053552377202888,
"calibration/mean_confidence": 0.5392683375418532,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 418.6,
"completions/max_terminated_length": 418.6,
"completions/mean_length": 186.12705078125,
"completions/mean_terminated_length": 186.12705078125,
"completions/min_length": 90.6,
"completions/min_terminated_length": 90.6,
"epoch": 0.512,
"grad_norm": 0.0013270304771140218,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 536138673.0,
"reward": 1.047600269317627,
"reward_std": 0.0819821760058403,
"rewards/accuracy_reward": 0.62021484375,
"rewards/brier_reward": 0.819976532459259,
"rewards/confidence_uniqueness_reward": 0.9540580749511719,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002464716648682952,
"rewards/frontier_coverage_1": 0.13561659753322602,
"rewards/frontier_coverage_10": 0.13561659753322602,
"rewards/frontier_coverage_15": 0.13561659753322602,
"rewards/frontier_coverage_20": 0.13561659753322602,
"rewards/frontier_coverage_25": 0.122788804769516,
"rewards/frontier_coverage_5": 0.13561659753322602,
"rewards/frontier_ece_reward": 0.011576398648321629,
"signal/accuracy_reward/centered_abs_mean": 0.103265380859375,
"signal/accuracy_reward/group_std_mean": 0.13870376944541932,
"signal/accuracy_reward/group_zero_std_frac": 0.59375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0516326904296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0516326904296875,
"signal/advantage_abs_mean": 0.06201394349336624,
"signal/advantage_pre_scale_abs_mean": 0.06201394349336624,
"signal/advantage_pre_scale_std": 0.11068142652511596,
"signal/advantage_std": 0.11068142652511596,
"signal/brier_reward/centered_abs_mean": 0.12418768852949143,
"signal/brier_reward/group_std_mean": 0.16362954676151276,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015523461066186428,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015523461066186428,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021464061737060548,
"signal/confidence_uniqueness_reward/group_std_mean": 0.027423058450222016,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026830077171325684,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026830077171325684,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023107901914045216,
"signal/frontier_aurc_reward/group_std_mean": 0.003936678450554609,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.13631434639683e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.13631434639683e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1497887223958969,
"signal/frontier_coverage_1/group_std_mean": 0.2013436108827591,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002681217947974801,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002681217947974801,
"signal/frontier_coverage_10/centered_abs_mean": 0.1497887223958969,
"signal/frontier_coverage_10/group_std_mean": 0.2013436108827591,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002681217947974801,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002681217947974801,
"signal/frontier_coverage_15/centered_abs_mean": 0.1497887223958969,
"signal/frontier_coverage_15/group_std_mean": 0.2013436108827591,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002681217947974801,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002681217947974801,
"signal/frontier_coverage_20/centered_abs_mean": 0.1497887223958969,
"signal/frontier_coverage_20/group_std_mean": 0.2013436108827591,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002681217947974801,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002681217947974801,
"signal/frontier_coverage_25/centered_abs_mean": 0.13205797374248504,
"signal/frontier_coverage_25/group_std_mean": 0.17783505022525786,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00236383774317801,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00236383774317801,
"signal/frontier_coverage_5/centered_abs_mean": 0.1497887223958969,
"signal/frontier_coverage_5/group_std_mean": 0.2013436108827591,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002681217947974801,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002681217947974801,
"signal/frontier_ece_reward/centered_abs_mean": 0.011835797131061554,
"signal/frontier_ece_reward/group_std_mean": 0.014990520663559437,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014794746413826943,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014794746413826943,
"step": 160
},
{
"calibration/aurc": 0.21724688671067058,
"calibration/batch_distribution_entropy": 0.9290459239453405,
"calibration/buffer_distribution_entropy": 0.9403927660747857,
"calibration/confidence_entropy": 0.40619159910496216,
"calibration/coverage@0%": 0.01796875,
"calibration/coverage@1%": 0.01796875,
"calibration/coverage@10%": 0.2698332008317025,
"calibration/coverage@15%": 0.37193003913894324,
"calibration/coverage@20%": 0.47208674779843446,
"calibration/coverage@25%": 0.6163145181017613,
"calibration/coverage@30%": 0.7265861974070449,
"calibration/coverage@5%": 0.12551216976516635,
"calibration/ece": 0.10725554418840492,
"calibration/mean_confidence": 0.5539072649878911,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 876.8,
"completions/max_terminated_length": 444.4,
"completions/mean_length": 181.95234375,
"completions/mean_terminated_length": 181.55501403808594,
"completions/min_length": 88.4,
"completions/min_terminated_length": 88.4,
"epoch": 0.528,
"grad_norm": 0.0014982522698119283,
"learning_rate": 1e-06,
"loss": 0.0014,
"num_tokens": 553031401.0,
"reward": 1.035895085334778,
"reward_std": 0.07947989255189895,
"rewards/accuracy_reward": 0.59501953125,
"rewards/brier_reward": 0.814744234085083,
"rewards/confidence_uniqueness_reward": 0.9488541841506958,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.002320343186147511,
"rewards/frontier_coverage_1": 0.15851781517267227,
"rewards/frontier_coverage_10": 0.15851781517267227,
"rewards/frontier_coverage_15": 0.15851781517267227,
"rewards/frontier_coverage_20": 0.15851781517267227,
"rewards/frontier_coverage_25": 0.14161890745162964,
"rewards/frontier_coverage_5": 0.15851781517267227,
"rewards/frontier_ece_reward": 0.011600286141037941,
"signal/accuracy_reward/centered_abs_mean": 0.106195068359375,
"signal/accuracy_reward/group_std_mean": 0.13894531279802322,
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0530975341796875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0530975341796875,
"signal/advantage_abs_mean": 0.060419239848852155,
"signal/advantage_pre_scale_abs_mean": 0.060419239848852155,
"signal/advantage_pre_scale_std": 0.10889140963554382,
"signal/advantage_std": 0.10889140963554382,
"signal/brier_reward/centered_abs_mean": 0.12778010070323945,
"signal/brier_reward/group_std_mean": 0.16365299820899964,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01597251258790493,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01597251258790493,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025607554242014885,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03395786285400391,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032009442802518606,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032009442802518606,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002263115392997861,
"signal/frontier_aurc_reward/group_std_mean": 0.0036556614562869073,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.050976349390112e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.050976349390112e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16458451747894287,
"signal/frontier_coverage_1/group_std_mean": 0.21477258801460267,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029460627119988203,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029460627119988203,
"signal/frontier_coverage_10/centered_abs_mean": 0.16458451747894287,
"signal/frontier_coverage_10/group_std_mean": 0.21477258801460267,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029460627119988203,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029460627119988203,
"signal/frontier_coverage_15/centered_abs_mean": 0.16458451747894287,
"signal/frontier_coverage_15/group_std_mean": 0.21477258801460267,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029460627119988203,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029460627119988203,
"signal/frontier_coverage_20/centered_abs_mean": 0.16458451747894287,
"signal/frontier_coverage_20/group_std_mean": 0.21477258801460267,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029460627119988203,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029460627119988203,
"signal/frontier_coverage_25/centered_abs_mean": 0.14212436079978943,
"signal/frontier_coverage_25/group_std_mean": 0.18566452860832214,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025440258905291557,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025440258905291557,
"signal/frontier_coverage_5/centered_abs_mean": 0.16458451747894287,
"signal/frontier_coverage_5/group_std_mean": 0.21477258801460267,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029460627119988203,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029460627119988203,
"signal/frontier_ece_reward/centered_abs_mean": 0.010803556628525257,
"signal/frontier_ece_reward/group_std_mean": 0.013579228706657887,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001350444578565657,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001350444578565657,
"step": 165
},
{
"calibration/aurc": 0.26676921280074456,
"calibration/batch_distribution_entropy": 0.9071265737076237,
"calibration/buffer_distribution_entropy": 0.9395798766963088,
"calibration/confidence_entropy": 0.40087509719588776,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.10441153987279843,
"calibration/coverage@15%": 0.21305497798434442,
"calibration/coverage@20%": 0.30528299290606653,
"calibration/coverage@25%": 0.49401372920743636,
"calibration/coverage@30%": 0.7015235139432485,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.10255522742341461,
"calibration/mean_confidence": 0.5823576404726291,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 631.0,
"completions/max_terminated_length": 412.0,
"completions/mean_length": 179.7796875,
"completions/mean_terminated_length": 179.5145263671875,
"completions/min_length": 84.0,
"completions/min_terminated_length": 84.0,
"epoch": 0.544,
"grad_norm": 0.0018303662072867155,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 570035929.0,
"reward": 1.0470038890838622,
"reward_std": 0.08949521332979202,
"rewards/accuracy_reward": 0.63212890625,
"rewards/brier_reward": 0.8009482145309448,
"rewards/confidence_uniqueness_reward": 0.9542343854904175,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0027931962627917527,
"rewards/frontier_coverage_1": 0.10076590478420258,
"rewards/frontier_coverage_10": 0.10076590478420258,
"rewards/frontier_coverage_15": 0.10076590478420258,
"rewards/frontier_coverage_20": 0.10076590478420258,
"rewards/frontier_coverage_25": 0.08373739868402481,
"rewards/frontier_coverage_5": 0.10076590478420258,
"rewards/frontier_ece_reward": 0.00937446840107441,
"signal/accuracy_reward/centered_abs_mean": 0.126141357421875,
"signal/accuracy_reward/group_std_mean": 0.16540803015232086,
"signal/accuracy_reward/group_zero_std_frac": 0.528125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0630706787109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0630706787109375,
"signal/advantage_abs_mean": 0.06790957748889923,
"signal/advantage_pre_scale_abs_mean": 0.06790957748889923,
"signal/advantage_pre_scale_std": 0.11734311580657959,
"signal/advantage_std": 0.11734311580657959,
"signal/brier_reward/centered_abs_mean": 0.13875785171985627,
"signal/brier_reward/group_std_mean": 0.1772547960281372,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017344731464982034,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017344731464982034,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02129780054092407,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02833399027585983,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002662225067615509,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002662225067615509,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002696292009204626,
"signal/frontier_aurc_reward/group_std_mean": 0.004545783344656229,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8263624921673906e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8263624921673906e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17123860418796538,
"signal/frontier_coverage_1/group_std_mean": 0.22258543372154235,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030651709996163843,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030651709996163843,
"signal/frontier_coverage_10/centered_abs_mean": 0.17123860418796538,
"signal/frontier_coverage_10/group_std_mean": 0.22258543372154235,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030651709996163843,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030651709996163843,
"signal/frontier_coverage_15/centered_abs_mean": 0.17123860418796538,
"signal/frontier_coverage_15/group_std_mean": 0.22258543372154235,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030651709996163843,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030651709996163843,
"signal/frontier_coverage_20/centered_abs_mean": 0.17123860418796538,
"signal/frontier_coverage_20/group_std_mean": 0.22258543372154235,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030651709996163843,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030651709996163843,
"signal/frontier_coverage_25/centered_abs_mean": 0.13877653181552888,
"signal/frontier_coverage_25/group_std_mean": 0.18102459311485292,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002484099706634879,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002484099706634879,
"signal/frontier_coverage_5/centered_abs_mean": 0.17123860418796538,
"signal/frontier_coverage_5/group_std_mean": 0.22258543372154235,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030651709996163843,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030651709996163843,
"signal/frontier_ece_reward/centered_abs_mean": 0.011479491926729679,
"signal/frontier_ece_reward/group_std_mean": 0.014317681267857551,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014349364908412098,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014349364908412098,
"step": 170
},
{
"calibration/aurc": 0.25630293714761593,
"calibration/batch_distribution_entropy": 0.9494929939257506,
"calibration/buffer_distribution_entropy": 0.9382286137261033,
"calibration/confidence_entropy": 0.4323171410988912,
"calibration/coverage@0%": 0.06328125,
"calibration/coverage@1%": 0.0875,
"calibration/coverage@10%": 0.21685267857142856,
"calibration/coverage@15%": 0.2813272076810176,
"calibration/coverage@20%": 0.40833460738747557,
"calibration/coverage@25%": 0.48687851638943247,
"calibration/coverage@30%": 0.5799000122309198,
"calibration/coverage@5%": 0.18714377446183952,
"calibration/ece": 0.11609540804760625,
"calibration/mean_confidence": 0.5559859733345548,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 628.0,
"completions/max_terminated_length": 406.6,
"completions/mean_length": 174.68447265625,
"completions/mean_terminated_length": 174.41849670410156,
"completions/min_length": 85.6,
"completions/min_terminated_length": 85.6,
"epoch": 0.56,
"grad_norm": 0.0008989165653474629,
"learning_rate": 1e-06,
"loss": 0.0012,
"num_tokens": 586646106.0,
"reward": 1.0321044683456422,
"reward_std": 0.07522638440132141,
"rewards/accuracy_reward": 0.58759765625,
"rewards/brier_reward": 0.8196055650711059,
"rewards/confidence_uniqueness_reward": 0.9552822113037109,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002765678521245718,
"rewards/frontier_coverage_1": 0.14798882305622102,
"rewards/frontier_coverage_10": 0.14798882305622102,
"rewards/frontier_coverage_15": 0.14798882305622102,
"rewards/frontier_coverage_20": 0.14798882305622102,
"rewards/frontier_coverage_25": 0.1166765883564949,
"rewards/frontier_coverage_5": 0.14798882305622102,
"rewards/frontier_ece_reward": 0.01006685383617878,
"signal/accuracy_reward/centered_abs_mean": 0.087664794921875,
"signal/accuracy_reward/group_std_mean": 0.12393931746482849,
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0438323974609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0438323974609375,
"signal/advantage_abs_mean": 0.0555981308221817,
"signal/advantage_pre_scale_abs_mean": 0.0555981308221817,
"signal/advantage_pre_scale_std": 0.10245826691389084,
"signal/advantage_std": 0.10245826691389084,
"signal/brier_reward/centered_abs_mean": 0.12557310312986375,
"signal/brier_reward/group_std_mean": 0.16274870932102203,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01569663789123297,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01569663789123297,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0206695556640625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.027292505279183386,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025836944580078123,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025836944580078123,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027116264682263136,
"signal/frontier_aurc_reward/group_std_mean": 0.004758490296080708,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.853811406064778e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.853811406064778e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15379790365695953,
"signal/frontier_coverage_1/group_std_mean": 0.20061783492565155,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027529822662472727,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027529822662472727,
"signal/frontier_coverage_10/centered_abs_mean": 0.15379790365695953,
"signal/frontier_coverage_10/group_std_mean": 0.20061783492565155,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027529822662472727,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027529822662472727,
"signal/frontier_coverage_15/centered_abs_mean": 0.15379790365695953,
"signal/frontier_coverage_15/group_std_mean": 0.20061783492565155,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027529822662472727,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027529822662472727,
"signal/frontier_coverage_20/centered_abs_mean": 0.15379790365695953,
"signal/frontier_coverage_20/group_std_mean": 0.20061783492565155,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027529822662472727,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027529822662472727,
"signal/frontier_coverage_25/centered_abs_mean": 0.11712785661220551,
"signal/frontier_coverage_25/group_std_mean": 0.1535368263721466,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020965886767953636,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020965886767953636,
"signal/frontier_coverage_5/centered_abs_mean": 0.15379790365695953,
"signal/frontier_coverage_5/group_std_mean": 0.20061783492565155,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027529822662472727,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027529822662472727,
"signal/frontier_ece_reward/centered_abs_mean": 0.01002585757523775,
"signal/frontier_ece_reward/group_std_mean": 0.012720918469130992,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012532321969047188,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012532321969047188,
"step": 175
},
{
"calibration/aurc": 0.3079945286543727,
"calibration/batch_distribution_entropy": 0.9393406726013804,
"calibration/buffer_distribution_entropy": 0.9394180622540608,
"calibration/confidence_entropy": 0.4314125793441579,
"calibration/coverage@0%": 0.009375,
"calibration/coverage@1%": 0.009375,
"calibration/coverage@10%": 0.098046875,
"calibration/coverage@15%": 0.155078125,
"calibration/coverage@20%": 0.25546875,
"calibration/coverage@25%": 0.36875,
"calibration/coverage@30%": 0.491796875,
"calibration/coverage@5%": 0.047265625,
"calibration/ece": 0.10765465442729547,
"calibration/mean_confidence": 0.5416941542047502,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 411.2,
"completions/max_terminated_length": 411.2,
"completions/mean_length": 171.85869140625,
"completions/mean_terminated_length": 171.85869140625,
"completions/min_length": 84.8,
"completions/min_terminated_length": 84.8,
"epoch": 0.576,
"grad_norm": 0.004144130740314722,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 603592563.0,
"reward": 1.0301400423049927,
"reward_std": 0.06984256058931351,
"rewards/accuracy_reward": 0.5873046875,
"rewards/brier_reward": 0.8139339208602905,
"rewards/confidence_uniqueness_reward": 0.9532180786132812,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0029315354768186808,
"rewards/frontier_coverage_1": 0.14102224856615067,
"rewards/frontier_coverage_10": 0.14102224856615067,
"rewards/frontier_coverage_15": 0.14102224856615067,
"rewards/frontier_coverage_20": 0.14102224856615067,
"rewards/frontier_coverage_25": 0.11173355653882026,
"rewards/frontier_coverage_5": 0.14102224856615067,
"rewards/frontier_ece_reward": 0.008197224885225295,
"signal/accuracy_reward/centered_abs_mean": 0.0781005859375,
"signal/accuracy_reward/group_std_mean": 0.11179777681827545,
"signal/accuracy_reward/group_zero_std_frac": 0.65,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03905029296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03905029296875,
"signal/advantage_abs_mean": 0.05057525113224983,
"signal/advantage_pre_scale_abs_mean": 0.05057525113224983,
"signal/advantage_pre_scale_std": 0.09479147344827651,
"signal/advantage_std": 0.09479147344827651,
"signal/brier_reward/centered_abs_mean": 0.12227167785167695,
"signal/brier_reward/group_std_mean": 0.15898571908473969,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015283959731459618,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015283959731459618,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021122956275939943,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02684759609401226,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002640369534492493,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002640369534492493,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023652775678783657,
"signal/frontier_aurc_reward/group_std_mean": 0.003888764465227723,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2338467756053434e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2338467756053434e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15091627687215806,
"signal/frontier_coverage_1/group_std_mean": 0.19856328666210174,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002701401337981224,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002701401337981224,
"signal/frontier_coverage_10/centered_abs_mean": 0.15091627687215806,
"signal/frontier_coverage_10/group_std_mean": 0.19856328666210174,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002701401337981224,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002701401337981224,
"signal/frontier_coverage_15/centered_abs_mean": 0.15091627687215806,
"signal/frontier_coverage_15/group_std_mean": 0.19856328666210174,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002701401337981224,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002701401337981224,
"signal/frontier_coverage_20/centered_abs_mean": 0.15091627687215806,
"signal/frontier_coverage_20/group_std_mean": 0.19856328666210174,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002701401337981224,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002701401337981224,
"signal/frontier_coverage_25/centered_abs_mean": 0.11474166065454483,
"signal/frontier_coverage_25/group_std_mean": 0.15160171389579774,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020538756158202886,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020538756158202886,
"signal/frontier_coverage_5/centered_abs_mean": 0.15091627687215806,
"signal/frontier_coverage_5/group_std_mean": 0.19856328666210174,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002701401337981224,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002701401337981224,
"signal/frontier_ece_reward/centered_abs_mean": 0.009535189159214497,
"signal/frontier_ece_reward/group_std_mean": 0.012084404565393924,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011918986449018121,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011918986449018121,
"step": 180
},
{
"calibration/aurc": 0.30330525952000287,
"calibration/batch_distribution_entropy": 0.9548014004939069,
"calibration/buffer_distribution_entropy": 0.9393214771578714,
"calibration/confidence_entropy": 0.4342403289114817,
"calibration/coverage@0%": 0.032821673189823874,
"calibration/coverage@1%": 0.032821673189823874,
"calibration/coverage@10%": 0.15863961594911938,
"calibration/coverage@15%": 0.29188937133072407,
"calibration/coverage@20%": 0.40682943982387476,
"calibration/coverage@25%": 0.5268101761252446,
"calibration/coverage@30%": 0.5983304794520548,
"calibration/coverage@5%": 0.06719667318982388,
"calibration/ece": 0.11588005294478321,
"calibration/mean_confidence": 0.5073022957419638,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 484.8,
"completions/max_terminated_length": 484.8,
"completions/mean_length": 170.01044921875,
"completions/mean_terminated_length": 170.01044921875,
"completions/min_length": 85.8,
"completions/min_terminated_length": 85.8,
"epoch": 0.592,
"grad_norm": 0.0013855872675776482,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 620501182.0,
"reward": 1.021399199962616,
"reward_std": 0.07876999825239181,
"rewards/accuracy_reward": 0.5703125,
"rewards/brier_reward": 0.806274163722992,
"rewards/confidence_uniqueness_reward": 0.9500226140022278,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0024875002447515724,
"rewards/frontier_coverage_1": 0.15400602370500566,
"rewards/frontier_coverage_10": 0.15400602370500566,
"rewards/frontier_coverage_15": 0.15400602370500566,
"rewards/frontier_coverage_20": 0.15400602370500566,
"rewards/frontier_coverage_25": 0.11699056923389435,
"rewards/frontier_coverage_5": 0.15400602370500566,
"rewards/frontier_ece_reward": 0.008153815101832152,
"signal/accuracy_reward/centered_abs_mean": 0.10953369140625,
"signal/accuracy_reward/group_std_mean": 0.14035410881042482,
"signal/accuracy_reward/group_zero_std_frac": 0.615625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.054766845703125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.054766845703125,
"signal/advantage_abs_mean": 0.06093080118298531,
"signal/advantage_pre_scale_abs_mean": 0.06093080118298531,
"signal/advantage_pre_scale_std": 0.10702161937952041,
"signal/advantage_std": 0.10702161937952041,
"signal/brier_reward/centered_abs_mean": 0.12534408420324325,
"signal/brier_reward/group_std_mean": 0.1609453946352005,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015668010525405406,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015668010525405406,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023938726261258127,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030889422819018363,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002992340782657266,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002992340782657266,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020968030439689755,
"signal/frontier_aurc_reward/group_std_mean": 0.003513322817161679,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7532773421844465e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7532773421844465e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17061007618904114,
"signal/frontier_coverage_1/group_std_mean": 0.2188116878271103,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030539202969521286,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030539202969521286,
"signal/frontier_coverage_10/centered_abs_mean": 0.17061007618904114,
"signal/frontier_coverage_10/group_std_mean": 0.2188116878271103,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030539202969521286,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030539202969521286,
"signal/frontier_coverage_15/centered_abs_mean": 0.17061007618904114,
"signal/frontier_coverage_15/group_std_mean": 0.2188116878271103,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030539202969521286,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030539202969521286,
"signal/frontier_coverage_20/centered_abs_mean": 0.17061007618904114,
"signal/frontier_coverage_20/group_std_mean": 0.2188116878271103,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030539202969521286,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030539202969521286,
"signal/frontier_coverage_25/centered_abs_mean": 0.1261043816804886,
"signal/frontier_coverage_25/group_std_mean": 0.16292393803596497,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00225726836360991,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00225726836360991,
"signal/frontier_coverage_5/centered_abs_mean": 0.17061007618904114,
"signal/frontier_coverage_5/group_std_mean": 0.2188116878271103,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030539202969521286,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030539202969521286,
"signal/frontier_ece_reward/centered_abs_mean": 0.008867009729146957,
"signal/frontier_ece_reward/group_std_mean": 0.011277035437524318,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011083762161433696,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011083762161433696,
"step": 185
},
{
"calibration/aurc": 0.23239763422790666,
"calibration/batch_distribution_entropy": 0.8995749973819773,
"calibration/buffer_distribution_entropy": 0.9392606380989322,
"calibration/confidence_entropy": 0.3906407868577815,
"calibration/coverage@0%": 0.055108702299412914,
"calibration/coverage@1%": 0.055108702299412914,
"calibration/coverage@10%": 0.24500902030332677,
"calibration/coverage@15%": 0.3739374388454012,
"calibration/coverage@20%": 0.5095011313600782,
"calibration/coverage@25%": 0.5989680161448141,
"calibration/coverage@30%": 0.6884356653620352,
"calibration/coverage@5%": 0.1422570633561644,
"calibration/ece": 0.10831119079106703,
"calibration/mean_confidence": 0.46993740997275896,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 846.0,
"completions/max_terminated_length": 387.8,
"completions/mean_length": 171.85595703125,
"completions/mean_terminated_length": 171.589794921875,
"completions/min_length": 83.4,
"completions/min_terminated_length": 83.4,
"epoch": 0.608,
"grad_norm": 0.0008504785946570337,
"learning_rate": 1e-06,
"loss": 0.0011,
"num_tokens": 637260475.0,
"reward": 1.0233449697494508,
"reward_std": 0.06167818456888199,
"rewards/accuracy_reward": 0.56572265625,
"rewards/brier_reward": 0.8286043047904968,
"rewards/confidence_uniqueness_reward": 0.9418840408325195,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0020069938618689775,
"rewards/frontier_coverage_1": 0.17736267149448395,
"rewards/frontier_coverage_10": 0.17736267149448395,
"rewards/frontier_coverage_15": 0.17736267149448395,
"rewards/frontier_coverage_20": 0.17532597184181214,
"rewards/frontier_coverage_25": 0.13192782700061798,
"rewards/frontier_coverage_5": 0.17736267149448395,
"rewards/frontier_ece_reward": 0.009247677959501743,
"signal/accuracy_reward/centered_abs_mean": 0.084332275390625,
"signal/accuracy_reward/group_std_mean": 0.11399843543767929,
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0421661376953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0421661376953125,
"signal/advantage_abs_mean": 0.04536699652671814,
"signal/advantage_pre_scale_abs_mean": 0.04536699652671814,
"signal/advantage_pre_scale_std": 0.08548016101121902,
"signal/advantage_std": 0.08548016101121902,
"signal/brier_reward/centered_abs_mean": 0.11550195217132568,
"signal/brier_reward/group_std_mean": 0.15080228447914124,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01443774402141571,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01443774402141571,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02914600744843483,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03730083778500557,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036432509310543536,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036432509310543536,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016248196363449097,
"signal/frontier_aurc_reward/group_std_mean": 0.0026754786260426043,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.908426904468797e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.908426904468797e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1710997462272644,
"signal/frontier_coverage_1/group_std_mean": 0.2212434083223343,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030626854859292507,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030626854859292507,
"signal/frontier_coverage_10/centered_abs_mean": 0.1710997462272644,
"signal/frontier_coverage_10/group_std_mean": 0.2212434083223343,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030626854859292507,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030626854859292507,
"signal/frontier_coverage_15/centered_abs_mean": 0.1710997462272644,
"signal/frontier_coverage_15/group_std_mean": 0.2212434083223343,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030626854859292507,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030626854859292507,
"signal/frontier_coverage_20/centered_abs_mean": 0.1688907653093338,
"signal/frontier_coverage_20/group_std_mean": 0.2184792071580887,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030231445096433164,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030231445096433164,
"signal/frontier_coverage_25/centered_abs_mean": 0.11933436542749405,
"signal/frontier_coverage_25/group_std_mean": 0.15433123409748079,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021360850892961024,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021360850892961024,
"signal/frontier_coverage_5/centered_abs_mean": 0.1710997462272644,
"signal/frontier_coverage_5/group_std_mean": 0.2212434083223343,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030626854859292507,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030626854859292507,
"signal/frontier_ece_reward/centered_abs_mean": 0.008279498293995857,
"signal/frontier_ece_reward/group_std_mean": 0.010457862541079522,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010349372867494821,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010349372867494821,
"step": 190
},
{
"calibration/aurc": 0.28448200475198304,
"calibration/batch_distribution_entropy": 0.9490308362782736,
"calibration/buffer_distribution_entropy": 0.9402397748087887,
"calibration/confidence_entropy": 0.43260784106219513,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.1171875,
"calibration/coverage@15%": 0.230859375,
"calibration/coverage@20%": 0.39609375,
"calibration/coverage@25%": 0.501953125,
"calibration/coverage@30%": 0.572265625,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.12011852848050042,
"calibration/mean_confidence": 0.4942197901707141,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 426.6,
"completions/max_terminated_length": 426.6,
"completions/mean_length": 173.60224609375,
"completions/mean_terminated_length": 173.60224609375,
"completions/min_length": 86.0,
"completions/min_terminated_length": 86.0,
"epoch": 0.624,
"grad_norm": 0.0009778736857697368,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 654382066.0,
"reward": 1.0262583494186401,
"reward_std": 0.07113818228244781,
"rewards/accuracy_reward": 0.57607421875,
"rewards/brier_reward": 0.8143329381942749,
"rewards/confidence_uniqueness_reward": 0.95128173828125,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002561471750959754,
"rewards/frontier_coverage_1": 0.1634742349386215,
"rewards/frontier_coverage_10": 0.1634742349386215,
"rewards/frontier_coverage_15": 0.1634742349386215,
"rewards/frontier_coverage_20": 0.15868508964776992,
"rewards/frontier_coverage_25": 0.11462056636810303,
"rewards/frontier_coverage_5": 0.1634742349386215,
"rewards/frontier_ece_reward": 0.007746654096990824,
"signal/accuracy_reward/centered_abs_mean": 0.092962646484375,
"signal/accuracy_reward/group_std_mean": 0.124091537296772,
"signal/accuracy_reward/group_zero_std_frac": 0.634375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0464813232421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0464813232421875,
"signal/advantage_abs_mean": 0.054235681891441345,
"signal/advantage_pre_scale_abs_mean": 0.054235681891441345,
"signal/advantage_pre_scale_std": 0.09768829345703126,
"signal/advantage_std": 0.09768829345703126,
"signal/brier_reward/centered_abs_mean": 0.12480789422988892,
"signal/brier_reward/group_std_mean": 0.16061947047710418,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015600986778736115,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015600986778736115,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02238917350769043,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02832588031888008,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027986466884613037,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027986466884613037,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024256373289972544,
"signal/frontier_aurc_reward/group_std_mean": 0.004125529807060957,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.3418908171588554e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.3418908171588554e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1706692099571228,
"signal/frontier_coverage_1/group_std_mean": 0.22160598039627075,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030549786519259215,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030549786519259215,
"signal/frontier_coverage_10/centered_abs_mean": 0.1706692099571228,
"signal/frontier_coverage_10/group_std_mean": 0.22160598039627075,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030549786519259215,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030549786519259215,
"signal/frontier_coverage_15/centered_abs_mean": 0.1706692099571228,
"signal/frontier_coverage_15/group_std_mean": 0.22160598039627075,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030549786519259215,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030549786519259215,
"signal/frontier_coverage_20/centered_abs_mean": 0.163962659239769,
"signal/frontier_coverage_20/group_std_mean": 0.21292484402656556,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029349314980208875,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029349314980208875,
"signal/frontier_coverage_25/centered_abs_mean": 0.11128398329019547,
"signal/frontier_coverage_25/group_std_mean": 0.14506538808345795,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001991983223706484,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001991983223706484,
"signal/frontier_coverage_5/centered_abs_mean": 0.1706692099571228,
"signal/frontier_coverage_5/group_std_mean": 0.22160598039627075,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030549786519259215,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030549786519259215,
"signal/frontier_ece_reward/centered_abs_mean": 0.007852244190871716,
"signal/frontier_ece_reward/group_std_mean": 0.009968752972781658,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009815305238589644,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009815305238589644,
"step": 195
},
{
"calibration/aurc": 0.2749330603883652,
"calibration/batch_distribution_entropy": 0.9157628654883949,
"calibration/buffer_distribution_entropy": 0.9417889462204929,
"calibration/confidence_entropy": 0.415460098321436,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.28203125,
"calibration/coverage@15%": 0.34375,
"calibration/coverage@20%": 0.41796875,
"calibration/coverage@25%": 0.528125,
"calibration/coverage@30%": 0.599609375,
"calibration/coverage@5%": 0.051953125,
"calibration/ece": 0.15501200980610025,
"calibration/mean_confidence": 0.5856844897140661,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 787.0,
"completions/max_terminated_length": 591.4,
"completions/mean_length": 174.65244140625,
"completions/mean_terminated_length": 174.38660888671876,
"completions/min_length": 82.6,
"completions/min_terminated_length": 82.6,
"epoch": 0.64,
"grad_norm": 0.0010942368535324931,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 671513195.0,
"reward": 1.0432765126228332,
"reward_std": 0.0722689650952816,
"rewards/accuracy_reward": 0.618359375,
"rewards/brier_reward": 0.8138016819953918,
"rewards/confidence_uniqueness_reward": 0.9505192637443542,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.002926664659753442,
"rewards/frontier_coverage_1": 0.1259578838944435,
"rewards/frontier_coverage_10": 0.1259578838944435,
"rewards/frontier_coverage_15": 0.1259578838944435,
"rewards/frontier_coverage_20": 0.11951190680265426,
"rewards/frontier_coverage_25": 0.08500352278351783,
"rewards/frontier_coverage_5": 0.1259578838944435,
"rewards/frontier_ece_reward": 0.008609351143240929,
"signal/accuracy_reward/centered_abs_mean": 0.09111328125,
"signal/accuracy_reward/group_std_mean": 0.11743369847536086,
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045556640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045556640625,
"signal/advantage_abs_mean": 0.05580408796668053,
"signal/advantage_pre_scale_abs_mean": 0.05580408796668053,
"signal/advantage_pre_scale_std": 0.10402074754238129,
"signal/advantage_std": 0.10402074754238129,
"signal/brier_reward/centered_abs_mean": 0.12034919857978821,
"signal/brier_reward/group_std_mean": 0.15445152223110198,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015043649822473526,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015043649822473526,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02375582978129387,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030650369822978973,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029694787226617336,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029694787226617336,
"signal/format_reward/centered_abs_mean": 0.000555419921875,
"signal/format_reward/group_std_mean": 0.0013209730386734009,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00334425400942564,
"signal/frontier_aurc_reward/group_std_mean": 0.005675982683897018,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.98621423705481e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.98621423705481e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13868267834186554,
"signal/frontier_coverage_1/group_std_mean": 0.1825660526752472,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024824199732393025,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024824199732393025,
"signal/frontier_coverage_10/centered_abs_mean": 0.13868267834186554,
"signal/frontier_coverage_10/group_std_mean": 0.1825660526752472,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024824199732393025,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024824199732393025,
"signal/frontier_coverage_15/centered_abs_mean": 0.13868267834186554,
"signal/frontier_coverage_15/group_std_mean": 0.1825660526752472,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024824199732393025,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024824199732393025,
"signal/frontier_coverage_20/centered_abs_mean": 0.13057875782251357,
"signal/frontier_coverage_20/group_std_mean": 0.17225528359413148,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002337359730154276,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002337359730154276,
"signal/frontier_coverage_25/centered_abs_mean": 0.08511566817760467,
"signal/frontier_coverage_25/group_std_mean": 0.11278624832630157,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015235703671351076,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015235703671351076,
"signal/frontier_coverage_5/centered_abs_mean": 0.13868267834186554,
"signal/frontier_coverage_5/group_std_mean": 0.1825660526752472,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024824199732393025,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024824199732393025,
"signal/frontier_ece_reward/centered_abs_mean": 0.008099580183625221,
"signal/frontier_ece_reward/group_std_mean": 0.010334640741348267,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010124475229531527,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010124475229531527,
"step": 200
},
{
"epoch": 0.64,
"eval_calibration/aurc": 0.4334341680670255,
"eval_calibration/batch_distribution_entropy": 0.8227332130727718,
"eval_calibration/buffer_distribution_entropy": 0.9423817091169264,
"eval_calibration/confidence_entropy": 0.38854819135867324,
"eval_calibration/coverage@0%": 0.0390625,
"eval_calibration/coverage@1%": 0.0390625,
"eval_calibration/coverage@10%": 0.1328125,
"eval_calibration/coverage@15%": 0.15625,
"eval_calibration/coverage@20%": 0.171875,
"eval_calibration/coverage@25%": 0.2421875,
"eval_calibration/coverage@30%": 0.4140625,
"eval_calibration/coverage@5%": 0.0390625,
"eval_calibration/ece": 0.225390625,
"eval_calibration/mean_confidence": 0.547578125,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 309.75,
"eval_completions/max_terminated_length": 309.75,
"eval_completions/mean_length": 173.53104782104492,
"eval_completions/mean_terminated_length": 173.53104782104492,
"eval_completions/min_length": 98.5,
"eval_completions/min_terminated_length": 98.5,
"eval_loss": 0.0,
"eval_num_tokens": 671513195.0,
"eval_reward": 0.9446232914924622,
"eval_reward_std": 0.2354012466967106,
"eval_rewards/accuracy_reward": 0.41796875,
"eval_rewards/brier_reward": 0.7981462776660919,
"eval_rewards/confidence_uniqueness_reward": 0.8916015625,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.004759653122164309,
"eval_rewards/frontier_coverage_1": 0.23640722408890724,
"eval_rewards/frontier_coverage_10": 0.23640722408890724,
"eval_rewards/frontier_coverage_15": 0.23640722408890724,
"eval_rewards/frontier_coverage_20": 0.2243974544107914,
"eval_rewards/frontier_coverage_25": 0.14654707163572311,
"eval_rewards/frontier_coverage_5": 0.23640722408890724,
"eval_rewards/frontier_ece_reward": 0.007511715171858668,
"eval_runtime": 17.5742,
"eval_samples_per_second": 28.451,
"eval_signal/accuracy_reward/centered_abs_mean": 0.469970703125,
"eval_signal/accuracy_reward/group_std_mean": 0.4919528365135193,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2349853515625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2349853515625,
"eval_signal/advantage_abs_mean": 0.2164347991347313,
"eval_signal/advantage_pre_scale_abs_mean": 0.2164347991347313,
"eval_signal/advantage_pre_scale_std": 0.23300310224294662,
"eval_signal/advantage_std": 0.23300310224294662,
"eval_signal/brier_reward/centered_abs_mean": 0.2241257205605507,
"eval_signal/brier_reward/group_std_mean": 0.2780345007777214,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028015715070068836,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.028015715070068836,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.050201416015625,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06067673675715923,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006275177001953125,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006275177001953125,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0064848861657083035,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.013537641265429556,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00011607945270952769,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00011607945270952769,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3593551740050316,
"eval_signal/frontier_coverage_1/group_std_mean": 0.43338172882795334,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006432457361370325,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006432457361370325,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3593551740050316,
"eval_signal/frontier_coverage_10/group_std_mean": 0.43338172882795334,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006432457361370325,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006432457361370325,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3593551740050316,
"eval_signal/frontier_coverage_15/group_std_mean": 0.43338172882795334,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006432457361370325,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006432457361370325,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.34003832191228867,
"eval_signal/frontier_coverage_20/group_std_mean": 0.4100157469511032,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006086685578338802,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006086685578338802,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.21234332025051117,
"eval_signal/frontier_coverage_25/group_std_mean": 0.2588745690882206,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003800945356488228,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003800945356488228,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3593551740050316,
"eval_signal/frontier_coverage_5/group_std_mean": 0.43338172882795334,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006432457361370325,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006432457361370325,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.012599717359989882,
"eval_signal/frontier_ece_reward/group_std_mean": 0.01608213922008872,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015749646699987352,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015749646699987352,
"eval_steps_per_second": 0.228,
"step": 200
},
{
"calibration/aurc": 0.4232796304790516,
"calibration/batch_distribution_entropy": 0.9313572623800672,
"calibration/buffer_distribution_entropy": 0.9428452319896881,
"calibration/confidence_entropy": 0.4383607192180536,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.013671875,
"calibration/coverage@20%": 0.0234375,
"calibration/coverage@25%": 0.094140625,
"calibration/coverage@30%": 0.26796875,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.1789134632389641,
"calibration/mean_confidence": 0.563563677522916,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 416.4,
"completions/max_terminated_length": 416.4,
"completions/mean_length": 172.06005859375,
"completions/mean_terminated_length": 172.06005859375,
"completions/min_length": 88.4,
"completions/min_terminated_length": 88.4,
"epoch": 0.656,
"grad_norm": 0.0009223693050444126,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 688131634.0,
"reward": 1.0271916270256043,
"reward_std": 0.07614715248346329,
"rewards/accuracy_reward": 0.58486328125,
"rewards/brier_reward": 0.8058655500411988,
"rewards/confidence_uniqueness_reward": 0.9529876708984375,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.004022358637303114,
"rewards/frontier_coverage_1": 0.14106732606887817,
"rewards/frontier_coverage_10": 0.14106732606887817,
"rewards/frontier_coverage_15": 0.14106732606887817,
"rewards/frontier_coverage_20": 0.136381658911705,
"rewards/frontier_coverage_25": 0.08988674730062485,
"rewards/frontier_coverage_5": 0.14106732606887817,
"rewards/frontier_ece_reward": 0.006597818806767464,
"signal/accuracy_reward/centered_abs_mean": 0.095013427734375,
"signal/accuracy_reward/group_std_mean": 0.12622675597667693,
"signal/accuracy_reward/group_zero_std_frac": 0.634375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0475067138671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0475067138671875,
"signal/advantage_abs_mean": 0.05853464975953102,
"signal/advantage_pre_scale_abs_mean": 0.05853464975953102,
"signal/advantage_pre_scale_std": 0.10676633566617966,
"signal/advantage_std": 0.10676633566617966,
"signal/brier_reward/centered_abs_mean": 0.12741532027721406,
"signal/brier_reward/group_std_mean": 0.16260745525360107,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015926915034651757,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015926915034651757,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02163679599761963,
"signal/confidence_uniqueness_reward/group_std_mean": 0.027704115584492685,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027045994997024537,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027045994997024537,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003789227642118931,
"signal/frontier_aurc_reward/group_std_mean": 0.006368549633771181,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.782717391615734e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.782717391615734e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14675131738185881,
"signal/frontier_coverage_1/group_std_mean": 0.19162927567958832,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026268486864864824,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026268486864864824,
"signal/frontier_coverage_10/centered_abs_mean": 0.14675131738185881,
"signal/frontier_coverage_10/group_std_mean": 0.19162927567958832,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026268486864864824,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026268486864864824,
"signal/frontier_coverage_15/centered_abs_mean": 0.14675131738185881,
"signal/frontier_coverage_15/group_std_mean": 0.19162927567958832,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026268486864864824,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026268486864864824,
"signal/frontier_coverage_20/centered_abs_mean": 0.14089352786540985,
"signal/frontier_coverage_20/group_std_mean": 0.18426248133182527,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002521994011476636,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002521994011476636,
"signal/frontier_coverage_25/centered_abs_mean": 0.0880542129278183,
"signal/frontier_coverage_25/group_std_mean": 0.11609538346529007,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015761703718453646,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015761703718453646,
"signal/frontier_coverage_5/centered_abs_mean": 0.14675131738185881,
"signal/frontier_coverage_5/group_std_mean": 0.19162927567958832,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026268486864864824,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026268486864864824,
"signal/frontier_ece_reward/centered_abs_mean": 0.008318292908370495,
"signal/frontier_ece_reward/group_std_mean": 0.010465490072965622,
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010397866135463119,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010397866135463119,
"step": 205
},
{
"calibration/aurc": 0.31886242468045917,
"calibration/batch_distribution_entropy": 0.8984277636909574,
"calibration/buffer_distribution_entropy": 0.9441781932677561,
"calibration/confidence_entropy": 0.39574065662655594,
"calibration/coverage@0%": 0.005078125,
"calibration/coverage@1%": 0.005078125,
"calibration/coverage@10%": 0.078125,
"calibration/coverage@15%": 0.101171875,
"calibration/coverage@20%": 0.232421875,
"calibration/coverage@25%": 0.284765625,
"calibration/coverage@30%": 0.398828125,
"calibration/coverage@5%": 0.043359375,
"calibration/ece": 0.17915248377084506,
"calibration/mean_confidence": 0.5848963897512844,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 416.4,
"completions/max_terminated_length": 416.4,
"completions/mean_length": 169.32275390625,
"completions/mean_terminated_length": 169.32275390625,
"completions/min_length": 81.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.672,
"grad_norm": 0.0012568546226248145,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 704778939.0,
"reward": 1.0182757139205934,
"reward_std": 0.07293715327978134,
"rewards/accuracy_reward": 0.56669921875,
"rewards/brier_reward": 0.8031091213226318,
"rewards/confidence_uniqueness_reward": 0.9437248229980468,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0037748562172055244,
"rewards/frontier_coverage_1": 0.15582288652658463,
"rewards/frontier_coverage_10": 0.15582288652658463,
"rewards/frontier_coverage_15": 0.15582288652658463,
"rewards/frontier_coverage_20": 0.15092057287693023,
"rewards/frontier_coverage_25": 0.10167192667722702,
"rewards/frontier_coverage_5": 0.15582288652658463,
"rewards/frontier_ece_reward": 0.007688873633742333,
"signal/accuracy_reward/centered_abs_mean": 0.095550537109375,
"signal/accuracy_reward/group_std_mean": 0.12681576907634734,
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0477752685546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0477752685546875,
"signal/advantage_abs_mean": 0.055553416907787326,
"signal/advantage_pre_scale_abs_mean": 0.055553416907787326,
"signal/advantage_pre_scale_std": 0.10308739989995956,
"signal/advantage_std": 0.10308739989995956,
"signal/brier_reward/centered_abs_mean": 0.12587104141712188,
"signal/brier_reward/group_std_mean": 0.16116216480731965,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015733880177140235,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015733880177140235,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027187180519104005,
"signal/confidence_uniqueness_reward/group_std_mean": 0.034485659748315814,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033983975648880006,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033983975648880006,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003499569371342659,
"signal/frontier_aurc_reward/group_std_mean": 0.005579089093953371,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.264229159569368e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.264229159569368e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15768970251083375,
"signal/frontier_coverage_1/group_std_mean": 0.20257034599781037,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028226455673575402,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028226455673575402,
"signal/frontier_coverage_10/centered_abs_mean": 0.15768970251083375,
"signal/frontier_coverage_10/group_std_mean": 0.20257034599781037,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028226455673575402,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028226455673575402,
"signal/frontier_coverage_15/centered_abs_mean": 0.15768970251083375,
"signal/frontier_coverage_15/group_std_mean": 0.20257034599781037,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028226455673575402,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028226455673575402,
"signal/frontier_coverage_20/centered_abs_mean": 0.15148624479770662,
"signal/frontier_coverage_20/group_std_mean": 0.19488502740859986,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002711603697389364,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002711603697389364,
"signal/frontier_coverage_25/centered_abs_mean": 0.09543160498142242,
"signal/frontier_coverage_25/group_std_mean": 0.12344460785388947,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017082256963476539,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017082256963476539,
"signal/frontier_coverage_5/centered_abs_mean": 0.15768970251083375,
"signal/frontier_coverage_5/group_std_mean": 0.20257034599781037,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028226455673575402,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028226455673575402,
"signal/frontier_ece_reward/centered_abs_mean": 0.008075537905097008,
"signal/frontier_ece_reward/group_std_mean": 0.01002963688224554,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001009442238137126,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001009442238137126,
"step": 210
},
{
"calibration/aurc": 0.3313409360366979,
"calibration/batch_distribution_entropy": 0.9063225326335698,
"calibration/buffer_distribution_entropy": 0.9449494763071751,
"calibration/confidence_entropy": 0.41968592190031984,
"calibration/coverage@0%": 0.02265625,
"calibration/coverage@1%": 0.02265625,
"calibration/coverage@10%": 0.118359375,
"calibration/coverage@15%": 0.215234375,
"calibration/coverage@20%": 0.332421875,
"calibration/coverage@25%": 0.41796875,
"calibration/coverage@30%": 0.469140625,
"calibration/coverage@5%": 0.06953125,
"calibration/ece": 0.142804706148896,
"calibration/mean_confidence": 0.5511033871465738,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 441.2,
"completions/max_terminated_length": 441.2,
"completions/mean_length": 170.13212890625,
"completions/mean_terminated_length": 170.13212890625,
"completions/min_length": 85.6,
"completions/min_terminated_length": 85.6,
"epoch": 0.688,
"grad_norm": 0.0009384833392687142,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 721475012.0,
"reward": 1.040995454788208,
"reward_std": 0.06858121380209922,
"rewards/accuracy_reward": 0.6099609375,
"rewards/brier_reward": 0.8156073451042175,
"rewards/confidence_uniqueness_reward": 0.9473793029785156,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002881459705531597,
"rewards/frontier_coverage_1": 0.14842391461133958,
"rewards/frontier_coverage_10": 0.14842391461133958,
"rewards/frontier_coverage_15": 0.14842391461133958,
"rewards/frontier_coverage_20": 0.1398467630147934,
"rewards/frontier_coverage_25": 0.09119481742382049,
"rewards/frontier_coverage_5": 0.14842391461133958,
"rewards/frontier_ece_reward": 0.007443835772573948,
"signal/accuracy_reward/centered_abs_mean": 0.090283203125,
"signal/accuracy_reward/group_std_mean": 0.12654573768377303,
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451416015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0451416015625,
"signal/advantage_abs_mean": 0.04979099705815315,
"signal/advantage_pre_scale_abs_mean": 0.04979099705815315,
"signal/advantage_pre_scale_std": 0.09680136144161225,
"signal/advantage_std": 0.09680136144161225,
"signal/brier_reward/centered_abs_mean": 0.1112976461648941,
"signal/brier_reward/group_std_mean": 0.14523135423660277,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013912205770611762,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013912205770611762,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024501824378967287,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03078327625989914,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003062728047370911,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003062728047370911,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023349984083324673,
"signal/frontier_aurc_reward/group_std_mean": 0.0037766343681141733,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1796470031840724e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1796470031840724e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15063327848911284,
"signal/frontier_coverage_1/group_std_mean": 0.19600337147712707,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026963357347995045,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026963357347995045,
"signal/frontier_coverage_10/centered_abs_mean": 0.15063327848911284,
"signal/frontier_coverage_10/group_std_mean": 0.19600337147712707,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026963357347995045,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026963357347995045,
"signal/frontier_coverage_15/centered_abs_mean": 0.15063327848911284,
"signal/frontier_coverage_15/group_std_mean": 0.19600337147712707,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026963357347995045,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026963357347995045,
"signal/frontier_coverage_20/centered_abs_mean": 0.14134447574615477,
"signal/frontier_coverage_20/group_std_mean": 0.18422182500362397,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002530066017061472,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002530066017061472,
"signal/frontier_coverage_25/centered_abs_mean": 0.08790470957756043,
"signal/frontier_coverage_25/group_std_mean": 0.11471493542194366,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015734942629933358,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015734942629933358,
"signal/frontier_coverage_5/centered_abs_mean": 0.15063327848911284,
"signal/frontier_coverage_5/group_std_mean": 0.19600337147712707,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026963357347995045,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026963357347995045,
"signal/frontier_ece_reward/centered_abs_mean": 0.007152719609439373,
"signal/frontier_ece_reward/group_std_mean": 0.009025894477963448,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008940899511799216,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008940899511799216,
"step": 215
},
{
"calibration/aurc": 0.27627447367657487,
"calibration/batch_distribution_entropy": 0.8754722884416527,
"calibration/buffer_distribution_entropy": 0.9454094123206801,
"calibration/confidence_entropy": 0.39629469442123594,
"calibration/coverage@0%": 0.00625,
"calibration/coverage@1%": 0.00625,
"calibration/coverage@10%": 0.079296875,
"calibration/coverage@15%": 0.17578125,
"calibration/coverage@20%": 0.2421875,
"calibration/coverage@25%": 0.4421875,
"calibration/coverage@30%": 0.6328125,
"calibration/coverage@5%": 0.0203125,
"calibration/ece": 0.11530746035844426,
"calibration/mean_confidence": 0.5567940371824305,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 395.0,
"completions/max_terminated_length": 395.0,
"completions/mean_length": 170.17822265625,
"completions/mean_terminated_length": 170.17822265625,
"completions/min_length": 81.4,
"completions/min_terminated_length": 81.4,
"epoch": 0.704,
"grad_norm": 0.0008490359177812934,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 738083781.0,
"reward": 1.036322784423828,
"reward_std": 0.06404575407505035,
"rewards/accuracy_reward": 0.59794921875,
"rewards/brier_reward": 0.8232455253601074,
"rewards/confidence_uniqueness_reward": 0.9483070373535156,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0026525924913585186,
"rewards/frontier_coverage_1": 0.15115799009799957,
"rewards/frontier_coverage_10": 0.15115799009799957,
"rewards/frontier_coverage_15": 0.15115799009799957,
"rewards/frontier_coverage_20": 0.14155119955539702,
"rewards/frontier_coverage_25": 0.09430107474327087,
"rewards/frontier_coverage_5": 0.15115799009799957,
"rewards/frontier_ece_reward": 0.007255460135638714,
"signal/accuracy_reward/centered_abs_mean": 0.079205322265625,
"signal/accuracy_reward/group_std_mean": 0.10510388016700745,
"signal/accuracy_reward/group_zero_std_frac": 0.7,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0396026611328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0396026611328125,
"signal/advantage_abs_mean": 0.04904469549655914,
"signal/advantage_pre_scale_abs_mean": 0.04904469549655914,
"signal/advantage_pre_scale_std": 0.09390701353549957,
"signal/advantage_std": 0.09390701353549957,
"signal/brier_reward/centered_abs_mean": 0.11579828858375549,
"signal/brier_reward/group_std_mean": 0.1494680106639862,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014474786072969436,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014474786072969436,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023987340927124023,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03076120503246784,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002998417615890503,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002998417615890503,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020967354532331227,
"signal/frontier_aurc_reward/group_std_mean": 0.0033214128809049724,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.753156343009323e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.753156343009323e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14697804301977158,
"signal/frontier_coverage_1/group_std_mean": 0.19314327836036682,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002630906878039241,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002630906878039241,
"signal/frontier_coverage_10/centered_abs_mean": 0.14697804301977158,
"signal/frontier_coverage_10/group_std_mean": 0.19314327836036682,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002630906878039241,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002630906878039241,
"signal/frontier_coverage_15/centered_abs_mean": 0.14697804301977158,
"signal/frontier_coverage_15/group_std_mean": 0.19314327836036682,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002630906878039241,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002630906878039241,
"signal/frontier_coverage_20/centered_abs_mean": 0.1339241683483124,
"signal/frontier_coverage_20/group_std_mean": 0.17631075382232667,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002397242630831897,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002397242630831897,
"signal/frontier_coverage_25/centered_abs_mean": 0.0837186723947525,
"signal/frontier_coverage_25/group_std_mean": 0.11048696041107178,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014985641930252314,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014985641930252314,
"signal/frontier_coverage_5/centered_abs_mean": 0.14697804301977158,
"signal/frontier_coverage_5/group_std_mean": 0.19314327836036682,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002630906878039241,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002630906878039241,
"signal/frontier_ece_reward/centered_abs_mean": 0.006838279590010643,
"signal/frontier_ece_reward/group_std_mean": 0.008609758876264095,
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008547849487513304,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008547849487513304,
"step": 220
},
{
"calibration/aurc": 0.24601851264573998,
"calibration/batch_distribution_entropy": 0.895164252233965,
"calibration/buffer_distribution_entropy": 0.9445043289902813,
"calibration/confidence_entropy": 0.39696186060770017,
"calibration/coverage@0%": 0.008984375,
"calibration/coverage@1%": 0.008984375,
"calibration/coverage@10%": 0.200390625,
"calibration/coverage@15%": 0.3,
"calibration/coverage@20%": 0.50625,
"calibration/coverage@25%": 0.580078125,
"calibration/coverage@30%": 0.65234375,
"calibration/coverage@5%": 0.077734375,
"calibration/ece": 0.13094241397917852,
"calibration/mean_confidence": 0.5467713178315746,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 625.0,
"completions/max_terminated_length": 410.8,
"completions/mean_length": 171.49765625,
"completions/mean_terminated_length": 171.36477355957032,
"completions/min_length": 83.8,
"completions/min_terminated_length": 83.8,
"epoch": 0.72,
"grad_norm": 0.001135468017309904,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 754849773.0,
"reward": 1.049725556373596,
"reward_std": 0.06837212964892388,
"rewards/accuracy_reward": 0.62744140625,
"rewards/brier_reward": 0.8200330138206482,
"rewards/confidence_uniqueness_reward": 0.9464787244796753,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.00218997981864959,
"rewards/frontier_coverage_1": 0.14537932425737382,
"rewards/frontier_coverage_10": 0.14537932425737382,
"rewards/frontier_coverage_15": 0.14537932425737382,
"rewards/frontier_coverage_20": 0.1319506511092186,
"rewards/frontier_coverage_25": 0.08901111930608749,
"rewards/frontier_coverage_5": 0.14537932425737382,
"rewards/frontier_ece_reward": 0.0073161104694008825,
"signal/accuracy_reward/centered_abs_mean": 0.088348388671875,
"signal/accuracy_reward/group_std_mean": 0.12076024860143661,
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0441741943359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0441741943359375,
"signal/advantage_abs_mean": 0.050909781455993654,
"signal/advantage_pre_scale_abs_mean": 0.050909781455993654,
"signal/advantage_pre_scale_std": 0.09785163402557373,
"signal/advantage_std": 0.09785163402557373,
"signal/brier_reward/centered_abs_mean": 0.11528852880001068,
"signal/brier_reward/group_std_mean": 0.14847175776958466,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014411066100001335,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014411066100001335,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024321822077035905,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030850404873490334,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003040227759629488,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003040227759629488,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016824970487505197,
"signal/frontier_aurc_reward/group_std_mean": 0.002684881491586566,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0116694688331336e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0116694688331336e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14414913952350616,
"signal/frontier_coverage_1/group_std_mean": 0.19248581528663636,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002580269519239664,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002580269519239664,
"signal/frontier_coverage_10/centered_abs_mean": 0.14414913952350616,
"signal/frontier_coverage_10/group_std_mean": 0.19248581528663636,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002580269519239664,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002580269519239664,
"signal/frontier_coverage_15/centered_abs_mean": 0.14414913952350616,
"signal/frontier_coverage_15/group_std_mean": 0.19248581528663636,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002580269519239664,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002580269519239664,
"signal/frontier_coverage_20/centered_abs_mean": 0.12617649585008622,
"signal/frontier_coverage_20/group_std_mean": 0.16896833181381227,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002258559106849134,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002258559106849134,
"signal/frontier_coverage_25/centered_abs_mean": 0.0776784896850586,
"signal/frontier_coverage_25/group_std_mean": 0.10378318727016449,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013904449297115207,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013904449297115207,
"signal/frontier_coverage_5/centered_abs_mean": 0.14414913952350616,
"signal/frontier_coverage_5/group_std_mean": 0.19248581528663636,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002580269519239664,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002580269519239664,
"signal/frontier_ece_reward/centered_abs_mean": 0.006297392770648003,
"signal/frontier_ece_reward/group_std_mean": 0.007990476116538048,
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007871740963310003,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007871740963310003,
"step": 225
},
{
"calibration/aurc": 0.26690781977820344,
"calibration/batch_distribution_entropy": 0.891574067576345,
"calibration/buffer_distribution_entropy": 0.944466150426838,
"calibration/confidence_entropy": 0.3951382923036295,
"calibration/coverage@0%": 0.00703125,
"calibration/coverage@1%": 0.00703125,
"calibration/coverage@10%": 0.106640625,
"calibration/coverage@15%": 0.30625,
"calibration/coverage@20%": 0.421484375,
"calibration/coverage@25%": 0.482421875,
"calibration/coverage@30%": 0.601171875,
"calibration/coverage@5%": 0.058203125,
"calibration/ece": 0.16398913550545838,
"calibration/mean_confidence": 0.5757356746185531,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 419.4,
"completions/max_terminated_length": 419.4,
"completions/mean_length": 169.4328125,
"completions/mean_terminated_length": 169.4328125,
"completions/min_length": 85.2,
"completions/min_terminated_length": 85.2,
"epoch": 0.736,
"grad_norm": 0.0008367888513021171,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 771524349.0,
"reward": 1.0533449411392213,
"reward_std": 0.06190124675631523,
"rewards/accuracy_reward": 0.63642578125,
"rewards/brier_reward": 0.8217647790908813,
"rewards/confidence_uniqueness_reward": 0.94322509765625,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0025330462027341127,
"rewards/frontier_coverage_1": 0.13909566402435303,
"rewards/frontier_coverage_10": 0.13909566402435303,
"rewards/frontier_coverage_15": 0.13909566402435303,
"rewards/frontier_coverage_20": 0.12320514023303986,
"rewards/frontier_coverage_25": 0.08486966341733933,
"rewards/frontier_coverage_5": 0.13909566402435303,
"rewards/frontier_ece_reward": 0.006958847213536501,
"signal/accuracy_reward/centered_abs_mean": 0.078021240234375,
"signal/accuracy_reward/group_std_mean": 0.10577622652053834,
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0390106201171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0390106201171875,
"signal/advantage_abs_mean": 0.04666791334748268,
"signal/advantage_pre_scale_abs_mean": 0.04666791334748268,
"signal/advantage_pre_scale_std": 0.08996414840221405,
"signal/advantage_std": 0.08996414840221405,
"signal/brier_reward/centered_abs_mean": 0.11445859372615814,
"signal/brier_reward/group_std_mean": 0.15056610703468323,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014307324215769768,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014307324215769768,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027132463455200196,
"signal/confidence_uniqueness_reward/group_std_mean": 0.034345941990613936,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033915579319000245,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033915579319000245,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018936245003715157,
"signal/frontier_aurc_reward/group_std_mean": 0.002985938685014844,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.389587946003303e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.389587946003303e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14948874711990356,
"signal/frontier_coverage_1/group_std_mean": 0.1956830859184265,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002675848500803113,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002675848500803113,
"signal/frontier_coverage_10/centered_abs_mean": 0.14948874711990356,
"signal/frontier_coverage_10/group_std_mean": 0.1956830859184265,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002675848500803113,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002675848500803113,
"signal/frontier_coverage_15/centered_abs_mean": 0.14948874711990356,
"signal/frontier_coverage_15/group_std_mean": 0.1956830859184265,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002675848500803113,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002675848500803113,
"signal/frontier_coverage_20/centered_abs_mean": 0.12488599568605423,
"signal/frontier_coverage_20/group_std_mean": 0.1639753460884094,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022354592569172383,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022354592569172383,
"signal/frontier_coverage_25/centered_abs_mean": 0.07906675487756729,
"signal/frontier_coverage_25/group_std_mean": 0.10347287952899933,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014152948977425694,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014152948977425694,
"signal/frontier_coverage_5/centered_abs_mean": 0.14948874711990356,
"signal/frontier_coverage_5/group_std_mean": 0.1956830859184265,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002675848500803113,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002675848500803113,
"signal/frontier_ece_reward/centered_abs_mean": 0.006176774390041828,
"signal/frontier_ece_reward/group_std_mean": 0.007886088266968728,
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007720967987552285,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007720967987552285,
"step": 230
},
{
"calibration/aurc": 0.2508649857745767,
"calibration/batch_distribution_entropy": 0.8772631690994259,
"calibration/buffer_distribution_entropy": 0.9435599844658459,
"calibration/confidence_entropy": 0.38182445660178704,
"calibration/coverage@0%": 0.005079653864970645,
"calibration/coverage@1%": 0.005079653864970645,
"calibration/coverage@10%": 0.19609527886497063,
"calibration/coverage@15%": 0.3114305589530333,
"calibration/coverage@20%": 0.45413175758317026,
"calibration/coverage@25%": 0.5537885273972603,
"calibration/coverage@30%": 0.6596868884540117,
"calibration/coverage@5%": 0.08867340386497065,
"calibration/ece": 0.11011742914387986,
"calibration/mean_confidence": 0.5086671170485625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 638.4,
"completions/max_terminated_length": 399.0,
"completions/mean_length": 168.17314453125,
"completions/mean_terminated_length": 168.03954467773437,
"completions/min_length": 82.8,
"completions/min_terminated_length": 82.8,
"epoch": 0.752,
"grad_norm": 0.0008424303960055113,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 788473642.0,
"reward": 1.0425845623016357,
"reward_std": 0.06250675097107887,
"rewards/accuracy_reward": 0.6162109375,
"rewards/brier_reward": 0.8180248498916626,
"rewards/confidence_uniqueness_reward": 0.9436966180801392,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0028093053959310057,
"rewards/frontier_coverage_1": 0.13971500843763351,
"rewards/frontier_coverage_10": 0.13971500843763351,
"rewards/frontier_coverage_15": 0.13971500843763351,
"rewards/frontier_coverage_20": 0.11759312674403191,
"rewards/frontier_coverage_25": 0.08073695451021194,
"rewards/frontier_coverage_5": 0.13971500843763351,
"rewards/frontier_ece_reward": 0.006474507041275501,
"signal/accuracy_reward/centered_abs_mean": 0.076611328125,
"signal/accuracy_reward/group_std_mean": 0.1029132753610611,
"signal/accuracy_reward/group_zero_std_frac": 0.7,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0383056640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0383056640625,
"signal/advantage_abs_mean": 0.04646777287125588,
"signal/advantage_pre_scale_abs_mean": 0.04646777287125588,
"signal/advantage_pre_scale_std": 0.09245792478322983,
"signal/advantage_std": 0.09245792478322983,
"signal/brier_reward/centered_abs_mean": 0.10695935487747192,
"signal/brier_reward/group_std_mean": 0.14035816490650177,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01336991935968399,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01336991935968399,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0261497862637043,
"signal/confidence_uniqueness_reward/group_std_mean": 0.033126043528318404,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032687232829630374,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032687232829630374,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002035298151895404,
"signal/frontier_aurc_reward/group_std_mean": 0.0031914392486214636,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.643183481472079e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.643183481472079e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13366345167160035,
"signal/frontier_coverage_1/group_std_mean": 0.17809367179870605,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023925757966935636,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023925757966935636,
"signal/frontier_coverage_10/centered_abs_mean": 0.13366345167160035,
"signal/frontier_coverage_10/group_std_mean": 0.17809367179870605,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023925757966935636,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023925757966935636,
"signal/frontier_coverage_15/centered_abs_mean": 0.13366345167160035,
"signal/frontier_coverage_15/group_std_mean": 0.17809367179870605,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023925757966935636,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023925757966935636,
"signal/frontier_coverage_20/centered_abs_mean": 0.11026991456747055,
"signal/frontier_coverage_20/group_std_mean": 0.14772895574569703,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001973831397481263,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001973831397481263,
"signal/frontier_coverage_25/centered_abs_mean": 0.06904419511556625,
"signal/frontier_coverage_25/group_std_mean": 0.09221114963293076,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012358910171315074,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012358910171315074,
"signal/frontier_coverage_5/centered_abs_mean": 0.13366345167160035,
"signal/frontier_coverage_5/group_std_mean": 0.17809367179870605,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023925757966935636,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023925757966935636,
"signal/frontier_ece_reward/centered_abs_mean": 0.005866312328726054,
"signal/frontier_ece_reward/group_std_mean": 0.007535163220018149,
"signal/frontier_ece_reward/group_zero_std_frac": 0.034375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007332890410907567,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007332890410907567,
"step": 235
},
{
"calibration/aurc": 0.2692653845277622,
"calibration/batch_distribution_entropy": 0.9305069965374951,
"calibration/buffer_distribution_entropy": 0.9422891039724725,
"calibration/confidence_entropy": 0.4174535256865025,
"calibration/coverage@0%": 0.04609375,
"calibration/coverage@1%": 0.04609375,
"calibration/coverage@10%": 0.2765625,
"calibration/coverage@15%": 0.321484375,
"calibration/coverage@20%": 0.384375,
"calibration/coverage@25%": 0.4265625,
"calibration/coverage@30%": 0.4875,
"calibration/coverage@5%": 0.20546875,
"calibration/ece": 0.18388124767598565,
"calibration/mean_confidence": 0.5307756600477634,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 408.0,
"completions/max_terminated_length": 408.0,
"completions/mean_length": 175.7646484375,
"completions/mean_terminated_length": 175.7646484375,
"completions/min_length": 87.8,
"completions/min_terminated_length": 87.8,
"epoch": 0.768,
"grad_norm": 0.0009248699061572552,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 805206176.0,
"reward": 1.0395651578903198,
"reward_std": 0.06530485600233078,
"rewards/accuracy_reward": 0.59873046875,
"rewards/brier_reward": 0.8308544993400574,
"rewards/confidence_uniqueness_reward": 0.9474327087402343,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002693426189944148,
"rewards/frontier_coverage_1": 0.17641193866729737,
"rewards/frontier_coverage_10": 0.17641193866729737,
"rewards/frontier_coverage_15": 0.17641193866729737,
"rewards/frontier_coverage_20": 0.1520010009407997,
"rewards/frontier_coverage_25": 0.0975383996963501,
"rewards/frontier_coverage_5": 0.17641193866729737,
"rewards/frontier_ece_reward": 0.006915272772312164,
"signal/accuracy_reward/centered_abs_mean": 0.080853271484375,
"signal/accuracy_reward/group_std_mean": 0.1111733928322792,
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0404266357421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0404266357421875,
"signal/advantage_abs_mean": 0.04827382862567901,
"signal/advantage_pre_scale_abs_mean": 0.04827382862567901,
"signal/advantage_pre_scale_std": 0.09414819777011871,
"signal/advantage_std": 0.09414819777011871,
"signal/brier_reward/centered_abs_mean": 0.11026288121938706,
"signal/brier_reward/group_std_mean": 0.14330510795116425,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013782860152423382,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013782860152423382,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02446780204772949,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0311261810362339,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030584752559661863,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030584752559661863,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019394330214709044,
"signal/frontier_aurc_reward/group_std_mean": 0.0028698711190372705,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.471585005172528e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.471585005172528e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14197806715965272,
"signal/frontier_coverage_1/group_std_mean": 0.1858145385980606,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002541407197713852,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002541407197713852,
"signal/frontier_coverage_10/centered_abs_mean": 0.14197806715965272,
"signal/frontier_coverage_10/group_std_mean": 0.1858145385980606,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002541407197713852,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002541407197713852,
"signal/frontier_coverage_15/centered_abs_mean": 0.14197806715965272,
"signal/frontier_coverage_15/group_std_mean": 0.1858145385980606,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002541407197713852,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002541407197713852,
"signal/frontier_coverage_20/centered_abs_mean": 0.1180332601070404,
"signal/frontier_coverage_20/group_std_mean": 0.15438797175884247,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002112795226275921,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002112795226275921,
"signal/frontier_coverage_25/centered_abs_mean": 0.0736841842532158,
"signal/frontier_coverage_25/group_std_mean": 0.0960970863699913,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013189468532800674,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013189468532800674,
"signal/frontier_coverage_5/centered_abs_mean": 0.14197806715965272,
"signal/frontier_coverage_5/group_std_mean": 0.1858145385980606,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002541407197713852,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002541407197713852,
"signal/frontier_ece_reward/centered_abs_mean": 0.005718752928078175,
"signal/frontier_ece_reward/group_std_mean": 0.007219527196139097,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007148441160097718,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007148441160097718,
"step": 240
},
{
"calibration/aurc": 0.33616682511869883,
"calibration/batch_distribution_entropy": 0.8847108969382628,
"calibration/buffer_distribution_entropy": 0.9401263071041226,
"calibration/confidence_entropy": 0.39040401656386514,
"calibration/coverage@0%": 0.019921875,
"calibration/coverage@1%": 0.019921875,
"calibration/coverage@10%": 0.1421875,
"calibration/coverage@15%": 0.270703125,
"calibration/coverage@20%": 0.309375,
"calibration/coverage@25%": 0.351953125,
"calibration/coverage@30%": 0.399609375,
"calibration/coverage@5%": 0.086328125,
"calibration/ece": 0.18135499849271383,
"calibration/mean_confidence": 0.5841278451007786,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 431.4,
"completions/max_terminated_length": 431.4,
"completions/mean_length": 179.7685546875,
"completions/mean_terminated_length": 179.7685546875,
"completions/min_length": 91.8,
"completions/min_terminated_length": 91.8,
"epoch": 0.784,
"grad_norm": 0.0009112543775700033,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 822221374.0,
"reward": 1.0436746239662171,
"reward_std": 0.06336919069290162,
"rewards/accuracy_reward": 0.62392578125,
"rewards/brier_reward": 0.8084685683250428,
"rewards/confidence_uniqueness_reward": 0.9493904113769531,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0032422452699393035,
"rewards/frontier_coverage_1": 0.11677031964063644,
"rewards/frontier_coverage_10": 0.11677031964063644,
"rewards/frontier_coverage_15": 0.11677031964063644,
"rewards/frontier_coverage_20": 0.0989714041352272,
"rewards/frontier_coverage_25": 0.06778252124786377,
"rewards/frontier_coverage_5": 0.11677031964063644,
"rewards/frontier_ece_reward": 0.0055341293103992936,
"signal/accuracy_reward/centered_abs_mean": 0.076141357421875,
"signal/accuracy_reward/group_std_mean": 0.10507323890924454,
"signal/accuracy_reward/group_zero_std_frac": 0.675,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0380706787109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0380706787109375,
"signal/advantage_abs_mean": 0.04689319357275963,
"signal/advantage_pre_scale_abs_mean": 0.04689319357275963,
"signal/advantage_pre_scale_std": 0.09247228652238845,
"signal/advantage_std": 0.09247228652238845,
"signal/brier_reward/centered_abs_mean": 0.10638263672590256,
"signal/brier_reward/group_std_mean": 0.13932308405637742,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01329782959073782,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01329782959073782,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024085187911987306,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030663982033729553,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030106484889984133,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030106484889984133,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024794211611151697,
"signal/frontier_aurc_reward/group_std_mean": 0.0039051207713782787,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.438163814484142e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.438163814484142e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.130580872297287,
"signal/frontier_coverage_1/group_std_mean": 0.17106756269931794,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002337397518567741,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002337397518567741,
"signal/frontier_coverage_10/centered_abs_mean": 0.130580872297287,
"signal/frontier_coverage_10/group_std_mean": 0.17106756269931794,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002337397518567741,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002337397518567741,
"signal/frontier_coverage_15/centered_abs_mean": 0.130580872297287,
"signal/frontier_coverage_15/group_std_mean": 0.17106756269931794,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002337397518567741,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002337397518567741,
"signal/frontier_coverage_20/centered_abs_mean": 0.10095408111810684,
"signal/frontier_coverage_20/group_std_mean": 0.13289882242679596,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018070780904963612,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018070780904963612,
"signal/frontier_coverage_25/centered_abs_mean": 0.06456505954265594,
"signal/frontier_coverage_25/group_std_mean": 0.08433714807033539,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011557145044207573,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011557145044207573,
"signal/frontier_coverage_5/centered_abs_mean": 0.130580872297287,
"signal/frontier_coverage_5/group_std_mean": 0.17106756269931794,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002337397518567741,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002337397518567741,
"signal/frontier_ece_reward/centered_abs_mean": 0.005532194208353758,
"signal/frontier_ece_reward/group_std_mean": 0.006969755701720715,
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006915242760442197,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006915242760442197,
"step": 245
},
{
"calibration/aurc": 0.2185446445677223,
"calibration/batch_distribution_entropy": 0.9204417599936402,
"calibration/buffer_distribution_entropy": 0.9386189563456048,
"calibration/confidence_entropy": 0.41792748593906587,
"calibration/coverage@0%": 0.01796875,
"calibration/coverage@1%": 0.01796875,
"calibration/coverage@10%": 0.17154629403131114,
"calibration/coverage@15%": 0.4205425941780822,
"calibration/coverage@20%": 0.5659124266144814,
"calibration/coverage@25%": 0.645634326076321,
"calibration/coverage@30%": 0.7261267734833659,
"calibration/coverage@5%": 0.080078125,
"calibration/ece": 0.09575703518133583,
"calibration/mean_confidence": 0.5476521104531964,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 858.6,
"completions/max_terminated_length": 449.0,
"completions/mean_length": 184.08486328125,
"completions/mean_terminated_length": 183.82038269042968,
"completions/min_length": 98.0,
"completions/min_terminated_length": 98.0,
"epoch": 0.8,
"grad_norm": 0.0015918654389679432,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 839116963.0,
"reward": 1.063495922088623,
"reward_std": 0.065474983304739,
"rewards/accuracy_reward": 0.65166015625,
"rewards/brier_reward": 0.8490127921104431,
"rewards/confidence_uniqueness_reward": 0.9503978848457336,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0026999707799404858,
"rewards/frontier_coverage_1": 0.12567180246114731,
"rewards/frontier_coverage_10": 0.12567180246114731,
"rewards/frontier_coverage_15": 0.12567180246114731,
"rewards/frontier_coverage_20": 0.10079433023929596,
"rewards/frontier_coverage_25": 0.07056059390306473,
"rewards/frontier_coverage_5": 0.12567180246114731,
"rewards/frontier_ece_reward": 0.0065612408332526686,
"signal/accuracy_reward/centered_abs_mean": 0.084564208984375,
"signal/accuracy_reward/group_std_mean": 0.11188896298408509,
"signal/accuracy_reward/group_zero_std_frac": 0.675,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0422821044921875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0422821044921875,
"signal/advantage_abs_mean": 0.049622184783220294,
"signal/advantage_pre_scale_abs_mean": 0.049622184783220294,
"signal/advantage_pre_scale_std": 0.09784637689590454,
"signal/advantage_std": 0.09784637689590454,
"signal/brier_reward/centered_abs_mean": 0.09944085478782654,
"signal/brier_reward/group_std_mean": 0.12967925816774367,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012430106848478317,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012430106848478317,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023244918510317802,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03046169951558113,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029056148137897252,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029056148137897252,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001967226341366768,
"signal/frontier_aurc_reward/group_std_mean": 0.0029548021499067544,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.521335020195693e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.521335020195693e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12609002143144607,
"signal/frontier_coverage_1/group_std_mean": 0.1646766871213913,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002257011365145445,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002257011365145445,
"signal/frontier_coverage_10/centered_abs_mean": 0.12609002143144607,
"signal/frontier_coverage_10/group_std_mean": 0.1646766871213913,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002257011365145445,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002257011365145445,
"signal/frontier_coverage_15/centered_abs_mean": 0.12609002143144607,
"signal/frontier_coverage_15/group_std_mean": 0.1646766871213913,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002257011365145445,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002257011365145445,
"signal/frontier_coverage_20/centered_abs_mean": 0.09504708796739578,
"signal/frontier_coverage_20/group_std_mean": 0.1250511020421982,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017013428499922157,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017013428499922157,
"signal/frontier_coverage_25/centered_abs_mean": 0.060256894677877426,
"signal/frontier_coverage_25/group_std_mean": 0.0785814642906189,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010785983293317258,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010785983293317258,
"signal/frontier_coverage_5/centered_abs_mean": 0.12609002143144607,
"signal/frontier_coverage_5/group_std_mean": 0.1646766871213913,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002257011365145445,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002257011365145445,
"signal/frontier_ece_reward/centered_abs_mean": 0.005481574684381485,
"signal/frontier_ece_reward/group_std_mean": 0.006954910047352314,
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006851968355476856,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006851968355476856,
"step": 250
},
{
"epoch": 0.8,
"eval_calibration/aurc": 0.5091041142943041,
"eval_calibration/batch_distribution_entropy": 0.8943448287266812,
"eval_calibration/buffer_distribution_entropy": 0.9384914438088658,
"eval_calibration/confidence_entropy": 0.424537314501817,
"eval_calibration/coverage@0%": 0.0390625,
"eval_calibration/coverage@1%": 0.0390625,
"eval_calibration/coverage@10%": 0.0390625,
"eval_calibration/coverage@15%": 0.0625,
"eval_calibration/coverage@20%": 0.1015625,
"eval_calibration/coverage@25%": 0.1328125,
"eval_calibration/coverage@30%": 0.2109375,
"eval_calibration/coverage@5%": 0.0390625,
"eval_calibration/ece": 0.24470620265151566,
"eval_calibration/mean_confidence": 0.5217374526515156,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 342.0,
"eval_completions/max_terminated_length": 342.0,
"eval_completions/mean_length": 190.5261993408203,
"eval_completions/mean_terminated_length": 190.5261993408203,
"eval_completions/min_length": 110.0,
"eval_completions/min_terminated_length": 110.0,
"eval_loss": 0.0,
"eval_num_tokens": 839116963.0,
"eval_reward": 0.944745734333992,
"eval_reward_std": 0.234944935888052,
"eval_rewards/accuracy_reward": 0.427734375,
"eval_rewards/brier_reward": 0.784925252199173,
"eval_rewards/confidence_uniqueness_reward": 0.899658203125,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0035588888567872345,
"eval_rewards/frontier_coverage_1": 0.21038169413805008,
"eval_rewards/frontier_coverage_10": 0.21038169413805008,
"eval_rewards/frontier_coverage_15": 0.21038169413805008,
"eval_rewards/frontier_coverage_20": 0.16055180132389069,
"eval_rewards/frontier_coverage_25": 0.0976751372218132,
"eval_rewards/frontier_coverage_5": 0.21038169413805008,
"eval_rewards/frontier_ece_reward": 0.005469902069307864,
"eval_runtime": 18.6142,
"eval_samples_per_second": 26.861,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4703369140625,
"eval_signal/accuracy_reward/group_std_mean": 0.49209941923618317,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23516845703125,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23516845703125,
"eval_signal/advantage_abs_mean": 0.21568895503878593,
"eval_signal/advantage_pre_scale_abs_mean": 0.21568895503878593,
"eval_signal/advantage_pre_scale_std": 0.23243148252367973,
"eval_signal/advantage_std": 0.23243148252367973,
"eval_signal/brier_reward/centered_abs_mean": 0.2361176684498787,
"eval_signal/brier_reward/group_std_mean": 0.28941161930561066,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029514708556234837,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.029514708556234837,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04095458984375,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04844135884195566,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00511932373046875,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00511932373046875,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004551700607407838,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007785420399159193,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.147543940140167e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.147543940140167e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3619851619005203,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4468979686498642,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006479534204117954,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006479534204117954,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3619851619005203,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4468979686498642,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006479534204117954,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006479534204117954,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3619851619005203,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4468979686498642,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006479534204117954,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006479534204117954,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2763536870479584,
"eval_signal/frontier_coverage_20/group_std_mean": 0.34432317316532135,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004946730565279722,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004946730565279722,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.15370038896799088,
"eval_signal/frontier_coverage_25/group_std_mean": 0.1981576457619667,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002751236781477928,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002751236781477928,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3619851619005203,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4468979686498642,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006479534204117954,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006479534204117954,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.009484815411269665,
"eval_signal/frontier_ece_reward/group_std_mean": 0.011611438822001219,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001185601926408708,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001185601926408708,
"eval_steps_per_second": 0.215,
"step": 250
},
{
"calibration/aurc": 0.22592958052596118,
"calibration/batch_distribution_entropy": 0.8792158677852591,
"calibration/buffer_distribution_entropy": 0.9373014248801486,
"calibration/confidence_entropy": 0.39230068107730476,
"calibration/coverage@0%": 0.015625,
"calibration/coverage@1%": 0.015625,
"calibration/coverage@10%": 0.183203125,
"calibration/coverage@15%": 0.30625,
"calibration/coverage@20%": 0.41875,
"calibration/coverage@25%": 0.70078125,
"calibration/coverage@30%": 0.804296875,
"calibration/coverage@5%": 0.066796875,
"calibration/ece": 0.11634392264448672,
"calibration/mean_confidence": 0.5905225369121635,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 419.4,
"completions/max_terminated_length": 419.4,
"completions/mean_length": 188.4681640625,
"completions/mean_terminated_length": 188.4681640625,
"completions/min_length": 96.8,
"completions/min_terminated_length": 96.8,
"epoch": 0.816,
"grad_norm": 0.0009058048599399626,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 856146045.0,
"reward": 1.0458628177642821,
"reward_std": 0.06390021666884423,
"rewards/accuracy_reward": 0.631640625,
"rewards/brier_reward": 0.8064838409423828,
"rewards/confidence_uniqueness_reward": 0.9514259338378906,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.00283654413651675,
"rewards/frontier_coverage_1": 0.0995137207210064,
"rewards/frontier_coverage_10": 0.0995137207210064,
"rewards/frontier_coverage_15": 0.0995137207210064,
"rewards/frontier_coverage_20": 0.084238101541996,
"rewards/frontier_coverage_25": 0.05991590246558189,
"rewards/frontier_coverage_5": 0.0995137207210064,
"rewards/frontier_ece_reward": 0.00519214584492147,
"signal/accuracy_reward/centered_abs_mean": 0.07982177734375,
"signal/accuracy_reward/group_std_mean": 0.11125250309705734,
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039910888671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.039910888671875,
"signal/advantage_abs_mean": 0.046833574771881104,
"signal/advantage_pre_scale_abs_mean": 0.046833574771881104,
"signal/advantage_pre_scale_std": 0.09199159741401672,
"signal/advantage_std": 0.09199159741401672,
"signal/brier_reward/centered_abs_mean": 0.11039517223834991,
"signal/brier_reward/group_std_mean": 0.1410120666027069,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013799396529793739,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013799396529793739,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02168407440185547,
"signal/confidence_uniqueness_reward/group_std_mean": 0.027639732882380485,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027105093002319338,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027105093002319338,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00219038394279778,
"signal/frontier_aurc_reward/group_std_mean": 0.0035009294748306274,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.920787130482495e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.920787130482495e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1411260485649109,
"signal/frontier_coverage_1/group_std_mean": 0.18258497714996338,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025261562783271073,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025261562783271073,
"signal/frontier_coverage_10/centered_abs_mean": 0.1411260485649109,
"signal/frontier_coverage_10/group_std_mean": 0.18258497714996338,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025261562783271073,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025261562783271073,
"signal/frontier_coverage_15/centered_abs_mean": 0.1411260485649109,
"signal/frontier_coverage_15/group_std_mean": 0.18258497714996338,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025261562783271073,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025261562783271073,
"signal/frontier_coverage_20/centered_abs_mean": 0.10667684972286225,
"signal/frontier_coverage_20/group_std_mean": 0.13872886300086976,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019095155643299223,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019095155643299223,
"signal/frontier_coverage_25/centered_abs_mean": 0.06732679009437562,
"signal/frontier_coverage_25/group_std_mean": 0.08767161518335342,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012051495257765054,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012051495257765054,
"signal/frontier_coverage_5/centered_abs_mean": 0.1411260485649109,
"signal/frontier_coverage_5/group_std_mean": 0.18258497714996338,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025261562783271073,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025261562783271073,
"signal/frontier_ece_reward/centered_abs_mean": 0.005312436446547508,
"signal/frontier_ece_reward/group_std_mean": 0.006751040741801262,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006640545558184386,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006640545558184386,
"step": 255
},
{
"calibration/aurc": 0.28655826197779344,
"calibration/batch_distribution_entropy": 0.9274159649176383,
"calibration/buffer_distribution_entropy": 0.9362063686127197,
"calibration/confidence_entropy": 0.4270325624699677,
"calibration/coverage@0%": 0.025390625,
"calibration/coverage@1%": 0.025390625,
"calibration/coverage@10%": 0.219921875,
"calibration/coverage@15%": 0.266796875,
"calibration/coverage@20%": 0.330078125,
"calibration/coverage@25%": 0.38671875,
"calibration/coverage@30%": 0.50546875,
"calibration/coverage@5%": 0.190625,
"calibration/ece": 0.12614358852833463,
"calibration/mean_confidence": 0.508274267709433,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 452.2,
"completions/max_terminated_length": 452.2,
"completions/mean_length": 195.576171875,
"completions/mean_terminated_length": 195.576171875,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.832,
"grad_norm": 0.0007307277410291135,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 873157097.0,
"reward": 1.0517502784729005,
"reward_std": 0.0625480704009533,
"rewards/accuracy_reward": 0.625390625,
"rewards/brier_reward": 0.8416979908943176,
"rewards/confidence_uniqueness_reward": 0.9532333374023437,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0020900500006973744,
"rewards/frontier_coverage_1": 0.14690827578306198,
"rewards/frontier_coverage_10": 0.14690827578306198,
"rewards/frontier_coverage_15": 0.14690827578306198,
"rewards/frontier_coverage_20": 0.11412490308284759,
"rewards/frontier_coverage_25": 0.0797498419880867,
"rewards/frontier_coverage_5": 0.14690827578306198,
"rewards/frontier_ece_reward": 0.005895926151424646,
"signal/accuracy_reward/centered_abs_mean": 0.08516845703125,
"signal/accuracy_reward/group_std_mean": 0.11420958936214447,
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042584228515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.042584228515625,
"signal/advantage_abs_mean": 0.04753379821777344,
"signal/advantage_pre_scale_abs_mean": 0.04753379821777344,
"signal/advantage_pre_scale_std": 0.09404327720403671,
"signal/advantage_std": 0.09404327720403671,
"signal/brier_reward/centered_abs_mean": 0.09486477375030518,
"signal/brier_reward/group_std_mean": 0.1234696313738823,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011858096718788147,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.011858096718788147,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0194500207901001,
"signal/confidence_uniqueness_reward/group_std_mean": 0.024607939645648003,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024312525987625123,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024312525987625123,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014977958402596415,
"signal/frontier_aurc_reward/group_std_mean": 0.0023715029004961253,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6810545386979355e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6810545386979355e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1350954830646515,
"signal/frontier_coverage_1/group_std_mean": 0.17487715780735016,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024182090070098638,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024182090070098638,
"signal/frontier_coverage_10/centered_abs_mean": 0.1350954830646515,
"signal/frontier_coverage_10/group_std_mean": 0.17487715780735016,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024182090070098638,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024182090070098638,
"signal/frontier_coverage_15/centered_abs_mean": 0.1350954830646515,
"signal/frontier_coverage_15/group_std_mean": 0.17487715780735016,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024182090070098638,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024182090070098638,
"signal/frontier_coverage_20/centered_abs_mean": 0.1005860447883606,
"signal/frontier_coverage_20/group_std_mean": 0.13043999224901198,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001800490147434175,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001800490147434175,
"signal/frontier_coverage_25/centered_abs_mean": 0.06362877637147904,
"signal/frontier_coverage_25/group_std_mean": 0.0820870503783226,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011389550636522472,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011389550636522472,
"signal/frontier_coverage_5/centered_abs_mean": 0.1350954830646515,
"signal/frontier_coverage_5/group_std_mean": 0.17487715780735016,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024182090070098638,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024182090070098638,
"signal/frontier_ece_reward/centered_abs_mean": 0.004868951346725226,
"signal/frontier_ece_reward/group_std_mean": 0.006263002008199692,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006086189183406532,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006086189183406532,
"step": 260
},
{
"calibration/aurc": 0.3079556619466057,
"calibration/batch_distribution_entropy": 0.9202860468697315,
"calibration/buffer_distribution_entropy": 0.9368981299208997,
"calibration/confidence_entropy": 0.4463013291048109,
"calibration/coverage@0%": 0.04140625,
"calibration/coverage@1%": 0.05234375,
"calibration/coverage@10%": 0.193359375,
"calibration/coverage@15%": 0.3015625,
"calibration/coverage@20%": 0.46484375,
"calibration/coverage@25%": 0.520703125,
"calibration/coverage@30%": 0.5625,
"calibration/coverage@5%": 0.1359375,
"calibration/ece": 0.1549067936068524,
"calibration/mean_confidence": 0.6070357810612892,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 500.2,
"completions/max_terminated_length": 500.2,
"completions/mean_length": 196.15537109375,
"completions/mean_terminated_length": 196.15537109375,
"completions/min_length": 99.2,
"completions/min_terminated_length": 99.2,
"epoch": 0.848,
"grad_norm": 0.0009925616905093193,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 890180096.0,
"reward": 1.0339147329330445,
"reward_std": 0.06378009840846062,
"rewards/accuracy_reward": 0.59375,
"rewards/brier_reward": 0.8303740501403809,
"rewards/confidence_uniqueness_reward": 0.9569587707519531,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002599796885624528,
"rewards/frontier_coverage_1": 0.13877977132797242,
"rewards/frontier_coverage_10": 0.13877977132797242,
"rewards/frontier_coverage_15": 0.13877977132797242,
"rewards/frontier_coverage_20": 0.10104698985815048,
"rewards/frontier_coverage_25": 0.06898890286684037,
"rewards/frontier_coverage_5": 0.13877977132797242,
"rewards/frontier_ece_reward": 0.0055153296329081055,
"signal/accuracy_reward/centered_abs_mean": 0.07659912109375,
"signal/accuracy_reward/group_std_mean": 0.10650120824575424,
"signal/accuracy_reward/group_zero_std_frac": 0.684375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.038299560546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.038299560546875,
"signal/advantage_abs_mean": 0.04687718003988266,
"signal/advantage_pre_scale_abs_mean": 0.04687718003988266,
"signal/advantage_pre_scale_std": 0.09359803646802903,
"signal/advantage_std": 0.09359803646802903,
"signal/brier_reward/centered_abs_mean": 0.09589692950248718,
"signal/brier_reward/group_std_mean": 0.12390242516994476,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011987116187810898,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.011987116187810898,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01778378486633301,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0222574207931757,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002222973108291626,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002222973108291626,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019035086035728455,
"signal/frontier_aurc_reward/group_std_mean": 0.0029362429399043322,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.407280346436892e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.407280346436892e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12207435816526413,
"signal/frontier_coverage_1/group_std_mean": 0.16072221398353576,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002185130910947919,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002185130910947919,
"signal/frontier_coverage_10/centered_abs_mean": 0.12207435816526413,
"signal/frontier_coverage_10/group_std_mean": 0.16072221398353576,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002185130910947919,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002185130910947919,
"signal/frontier_coverage_15/centered_abs_mean": 0.12207435816526413,
"signal/frontier_coverage_15/group_std_mean": 0.16072221398353576,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002185130910947919,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002185130910947919,
"signal/frontier_coverage_20/centered_abs_mean": 0.09141052961349487,
"signal/frontier_coverage_20/group_std_mean": 0.12017861008644104,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016362484311684966,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016362484311684966,
"signal/frontier_coverage_25/centered_abs_mean": 0.057944309711456296,
"signal/frontier_coverage_25/group_std_mean": 0.07580447942018509,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010372031247243285,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010372031247243285,
"signal/frontier_coverage_5/centered_abs_mean": 0.12207435816526413,
"signal/frontier_coverage_5/group_std_mean": 0.16072221398353576,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002185130910947919,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002185130910947919,
"signal/frontier_ece_reward/centered_abs_mean": 0.004729109071195126,
"signal/frontier_ece_reward/group_std_mean": 0.006064791046082973,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005911386338993907,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005911386338993907,
"step": 265
},
{
"calibration/aurc": 0.24449910733899616,
"calibration/batch_distribution_entropy": 0.9046358315553906,
"calibration/buffer_distribution_entropy": 0.937494096852156,
"calibration/confidence_entropy": 0.4440393999061635,
"calibration/coverage@0%": 0.016019447162426613,
"calibration/coverage@1%": 0.016019447162426613,
"calibration/coverage@10%": 0.22835127201565558,
"calibration/coverage@15%": 0.29830372431506846,
"calibration/coverage@20%": 0.39177164872798437,
"calibration/coverage@25%": 0.46211472602739717,
"calibration/coverage@30%": 0.6633561643835616,
"calibration/coverage@5%": 0.1157648911448141,
"calibration/ece": 0.14796933480533733,
"calibration/mean_confidence": 0.6401590401083131,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 901.0,
"completions/max_terminated_length": 610.4,
"completions/mean_length": 203.5150390625,
"completions/mean_terminated_length": 203.2554504394531,
"completions/min_length": 93.4,
"completions/min_terminated_length": 93.4,
"epoch": 0.864,
"grad_norm": 0.001151371281594038,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 907250906.0,
"reward": 1.0528210639953612,
"reward_std": 0.06563054919242858,
"rewards/accuracy_reward": 0.63896484375,
"rewards/brier_reward": 0.8275640845298767,
"rewards/confidence_uniqueness_reward": 0.9524169325828552,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.002205755584873259,
"rewards/frontier_coverage_1": 0.10852386504411697,
"rewards/frontier_coverage_10": 0.10852386504411697,
"rewards/frontier_coverage_15": 0.10827866345643997,
"rewards/frontier_coverage_20": 0.08587422221899033,
"rewards/frontier_coverage_25": 0.06290318444371223,
"rewards/frontier_coverage_5": 0.10852386504411697,
"rewards/frontier_ece_reward": 0.0051742102019488815,
"signal/accuracy_reward/centered_abs_mean": 0.087860107421875,
"signal/accuracy_reward/group_std_mean": 0.1141038790345192,
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0439300537109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0439300537109375,
"signal/advantage_abs_mean": 0.05022150054574013,
"signal/advantage_pre_scale_abs_mean": 0.05022150054574013,
"signal/advantage_pre_scale_std": 0.0995995968580246,
"signal/advantage_std": 0.0995995968580246,
"signal/brier_reward/centered_abs_mean": 0.10091617107391357,
"signal/brier_reward/group_std_mean": 0.1313829392194748,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012614521384239196,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012614521384239196,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020576045289635657,
"signal/confidence_uniqueness_reward/group_std_mean": 0.026751379668712615,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002572005661204457,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002572005661204457,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017056349897757173,
"signal/frontier_aurc_reward/group_std_mean": 0.0027086624410003423,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.053086475119926e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.053086475119926e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12970556020736695,
"signal/frontier_coverage_1/group_std_mean": 0.1699573963880539,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023217292502522467,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023217292502522467,
"signal/frontier_coverage_10/centered_abs_mean": 0.12970556020736695,
"signal/frontier_coverage_10/group_std_mean": 0.1699573963880539,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023217292502522467,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023217292502522467,
"signal/frontier_coverage_15/centered_abs_mean": 0.1289975494146347,
"signal/frontier_coverage_15/group_std_mean": 0.16906480193138124,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002309055905789137,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002309055905789137,
"signal/frontier_coverage_20/centered_abs_mean": 0.09495823979377746,
"signal/frontier_coverage_20/group_std_mean": 0.1250176891684532,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016997524769976735,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016997524769976735,
"signal/frontier_coverage_25/centered_abs_mean": 0.06021154895424843,
"signal/frontier_coverage_25/group_std_mean": 0.07897393554449081,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001077786646783352,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001077786646783352,
"signal/frontier_coverage_5/centered_abs_mean": 0.12970556020736695,
"signal/frontier_coverage_5/group_std_mean": 0.1699573963880539,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023217292502522467,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023217292502522467,
"signal/frontier_ece_reward/centered_abs_mean": 0.004722311254590749,
"signal/frontier_ece_reward/group_std_mean": 0.006080184411257505,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005902889068238437,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005902889068238437,
"step": 270
},
{
"calibration/aurc": 0.3413525891312035,
"calibration/batch_distribution_entropy": 0.9247650910176233,
"calibration/buffer_distribution_entropy": 0.9369284539687136,
"calibration/confidence_entropy": 0.40555091128192194,
"calibration/coverage@0%": 0.012109375,
"calibration/coverage@1%": 0.012109375,
"calibration/coverage@10%": 0.067578125,
"calibration/coverage@15%": 0.103125,
"calibration/coverage@20%": 0.149609375,
"calibration/coverage@25%": 0.209765625,
"calibration/coverage@30%": 0.465234375,
"calibration/coverage@5%": 0.042578125,
"calibration/ece": 0.15222579926313382,
"calibration/mean_confidence": 0.5201320484059387,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 486.8,
"completions/max_terminated_length": 486.8,
"completions/mean_length": 209.0919921875,
"completions/mean_terminated_length": 209.0919921875,
"completions/min_length": 104.8,
"completions/min_terminated_length": 104.8,
"epoch": 0.88,
"grad_norm": 0.0009639724157750607,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 924539080.0,
"reward": 1.0366958379745483,
"reward_std": 0.06528096497058869,
"rewards/accuracy_reward": 0.60009765625,
"rewards/brier_reward": 0.8175089240074158,
"rewards/confidence_uniqueness_reward": 0.9496978759765625,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0024166646180674434,
"rewards/frontier_coverage_1": 0.1617402657866478,
"rewards/frontier_coverage_10": 0.1617402657866478,
"rewards/frontier_coverage_15": 0.15886529982089997,
"rewards/frontier_coverage_20": 0.11558039635419845,
"rewards/frontier_coverage_25": 0.0815199762582779,
"rewards/frontier_coverage_5": 0.1617402657866478,
"rewards/frontier_ece_reward": 0.0058576924726367,
"signal/accuracy_reward/centered_abs_mean": 0.088812255859375,
"signal/accuracy_reward/group_std_mean": 0.11949178874492646,
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0444061279296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0444061279296875,
"signal/advantage_abs_mean": 0.049381940811872485,
"signal/advantage_pre_scale_abs_mean": 0.049381940811872485,
"signal/advantage_pre_scale_std": 0.0957550585269928,
"signal/advantage_std": 0.0957550585269928,
"signal/brier_reward/centered_abs_mean": 0.1052006021142006,
"signal/brier_reward/group_std_mean": 0.1361723154783249,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013150075264275075,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013150075264275075,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022104668617248534,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02765066474676132,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002763083577156067,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002763083577156067,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019552123732864855,
"signal/frontier_aurc_reward/group_std_mean": 0.0030503868591040375,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.49983005435206e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.49983005435206e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14229839742183686,
"signal/frontier_coverage_1/group_std_mean": 0.18508260250091552,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002547141211107373,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002547141211107373,
"signal/frontier_coverage_10/centered_abs_mean": 0.14229839742183686,
"signal/frontier_coverage_10/group_std_mean": 0.18508260250091552,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002547141211107373,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002547141211107373,
"signal/frontier_coverage_15/centered_abs_mean": 0.1387757331132889,
"signal/frontier_coverage_15/group_std_mean": 0.18052529096603392,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024840855039656162,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024840855039656162,
"signal/frontier_coverage_20/centered_abs_mean": 0.10256319344043732,
"signal/frontier_coverage_20/group_std_mean": 0.13410095870494843,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018358811037614941,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018358811037614941,
"signal/frontier_coverage_25/centered_abs_mean": 0.0661829337477684,
"signal/frontier_coverage_25/group_std_mean": 0.08609075546264648,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001184674515388906,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001184674515388906,
"signal/frontier_coverage_5/centered_abs_mean": 0.14229839742183686,
"signal/frontier_coverage_5/group_std_mean": 0.18508260250091552,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002547141211107373,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002547141211107373,
"signal/frontier_ece_reward/centered_abs_mean": 0.004747295938432217,
"signal/frontier_ece_reward/group_std_mean": 0.00602139700204134,
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005934119923040271,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005934119923040271,
"step": 275
},
{
"calibration/aurc": 0.36725993407894925,
"calibration/batch_distribution_entropy": 0.8933985734348454,
"calibration/buffer_distribution_entropy": 0.9352886578588653,
"calibration/confidence_entropy": 0.39277611637945686,
"calibration/coverage@0%": 0.009765625,
"calibration/coverage@1%": 0.009765625,
"calibration/coverage@10%": 0.0765625,
"calibration/coverage@15%": 0.11875,
"calibration/coverage@20%": 0.169140625,
"calibration/coverage@25%": 0.338671875,
"calibration/coverage@30%": 0.43984375,
"calibration/coverage@5%": 0.01328125,
"calibration/ece": 0.19819743553958977,
"calibration/mean_confidence": 0.5616876995536956,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 521.4,
"completions/max_terminated_length": 521.4,
"completions/mean_length": 218.483984375,
"completions/mean_terminated_length": 218.483984375,
"completions/min_length": 105.6,
"completions/min_terminated_length": 105.6,
"epoch": 0.896,
"grad_norm": 0.0013427204685285687,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 941887204.0,
"reward": 1.034142303466797,
"reward_std": 0.061747805774211885,
"rewards/accuracy_reward": 0.596484375,
"rewards/brier_reward": 0.8217704772949219,
"rewards/confidence_uniqueness_reward": 0.9495529174804688,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002957669971510768,
"rewards/frontier_coverage_1": 0.14804764688014985,
"rewards/frontier_coverage_10": 0.14804764688014985,
"rewards/frontier_coverage_15": 0.14638043344020843,
"rewards/frontier_coverage_20": 0.1089130237698555,
"rewards/frontier_coverage_25": 0.07781351059675216,
"rewards/frontier_coverage_5": 0.14804764688014985,
"rewards/frontier_ece_reward": 0.004998845653608442,
"signal/accuracy_reward/centered_abs_mean": 0.07838134765625,
"signal/accuracy_reward/group_std_mean": 0.10633349418640137,
"signal/accuracy_reward/group_zero_std_frac": 0.684375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039190673828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.039190673828125,
"signal/advantage_abs_mean": 0.04575110375881195,
"signal/advantage_pre_scale_abs_mean": 0.04575110375881195,
"signal/advantage_pre_scale_std": 0.0925728052854538,
"signal/advantage_std": 0.0925728052854538,
"signal/brier_reward/centered_abs_mean": 0.09987995773553848,
"signal/brier_reward/group_std_mean": 0.13066966235637664,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01248499471694231,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01248499471694231,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021440339088439942,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02688387930393219,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026800423860549928,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026800423860549928,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002111524622887373,
"signal/frontier_aurc_reward/group_std_mean": 0.0032168001867830752,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7796289325342515e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7796289325342515e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13056060820817947,
"signal/frontier_coverage_1/group_std_mean": 0.17031558454036713,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023370349314063787,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023370349314063787,
"signal/frontier_coverage_10/centered_abs_mean": 0.13056060820817947,
"signal/frontier_coverage_10/group_std_mean": 0.17031558454036713,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023370349314063787,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023370349314063787,
"signal/frontier_coverage_15/centered_abs_mean": 0.12676671743392945,
"signal/frontier_coverage_15/group_std_mean": 0.16541725099086763,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022691241931170223,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022691241931170223,
"signal/frontier_coverage_20/centered_abs_mean": 0.09470010697841644,
"signal/frontier_coverage_20/group_std_mean": 0.12389432638883591,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016951319063082337,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016951319063082337,
"signal/frontier_coverage_25/centered_abs_mean": 0.06315547078847886,
"signal/frontier_coverage_25/group_std_mean": 0.08203252255916596,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011304829269647599,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011304829269647599,
"signal/frontier_coverage_5/centered_abs_mean": 0.13056060820817947,
"signal/frontier_coverage_5/group_std_mean": 0.17031558454036713,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023370349314063787,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023370349314063787,
"signal/frontier_ece_reward/centered_abs_mean": 0.0044867975637316706,
"signal/frontier_ece_reward/group_std_mean": 0.005752355605363846,
"signal/frontier_ece_reward/group_zero_std_frac": 0.03125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005608496954664588,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005608496954664588,
"step": 280
},
{
"calibration/aurc": 0.35689277553870924,
"calibration/batch_distribution_entropy": 0.9155435810447115,
"calibration/buffer_distribution_entropy": 0.9320674021748564,
"calibration/confidence_entropy": 0.4032934144385215,
"calibration/coverage@0%": 0.016033206947162426,
"calibration/coverage@1%": 0.016033206947162426,
"calibration/coverage@10%": 0.0832466976516634,
"calibration/coverage@15%": 0.1575090203033268,
"calibration/coverage@20%": 0.33780271526418787,
"calibration/coverage@25%": 0.3948683647260274,
"calibration/coverage@30%": 0.4992042257827789,
"calibration/coverage@5%": 0.021507307974559686,
"calibration/ece": 0.16610141089467823,
"calibration/mean_confidence": 0.5162139753059,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 845.8,
"completions/max_terminated_length": 672.4,
"completions/mean_length": 230.59306640625,
"completions/mean_terminated_length": 230.3378448486328,
"completions/min_length": 111.2,
"completions/min_terminated_length": 111.2,
"epoch": 0.912,
"grad_norm": 0.0006971288821659982,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 959299773.0,
"reward": 1.0343021631240845,
"reward_std": 0.06137159615755081,
"rewards/accuracy_reward": 0.59765625,
"rewards/brier_reward": 0.8208606958389282,
"rewards/confidence_uniqueness_reward": 0.9507390022277832,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002479233802296221,
"rewards/frontier_coverage_1": 0.1423247776925564,
"rewards/frontier_coverage_10": 0.1423247776925564,
"rewards/frontier_coverage_15": 0.13940538018941878,
"rewards/frontier_coverage_20": 0.10835960805416107,
"rewards/frontier_coverage_25": 0.07879922837018967,
"rewards/frontier_coverage_5": 0.1423247776925564,
"rewards/frontier_ece_reward": 0.005422212090343237,
"signal/accuracy_reward/centered_abs_mean": 0.0740234375,
"signal/accuracy_reward/group_std_mean": 0.10433268696069717,
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03701171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03701171875,
"signal/advantage_abs_mean": 0.04508618414402008,
"signal/advantage_pre_scale_abs_mean": 0.04508618414402008,
"signal/advantage_pre_scale_std": 0.08746702373027801,
"signal/advantage_std": 0.08746702373027801,
"signal/brier_reward/centered_abs_mean": 0.11010385453701019,
"signal/brier_reward/group_std_mean": 0.1430896759033203,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013762981817126273,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013762981817126273,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022147323563694955,
"signal/confidence_uniqueness_reward/group_std_mean": 0.029007868096232414,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027684154454618694,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027684154454618694,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001991865341551602,
"signal/frontier_aurc_reward/group_std_mean": 0.0031465083360672,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.565438746591099e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.565438746591099e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15080978870391845,
"signal/frontier_coverage_1/group_std_mean": 0.19573720097541808,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002699495013803244,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002699495013803244,
"signal/frontier_coverage_10/centered_abs_mean": 0.15080978870391845,
"signal/frontier_coverage_10/group_std_mean": 0.19573720097541808,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002699495013803244,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002699495013803244,
"signal/frontier_coverage_15/centered_abs_mean": 0.14642856270074844,
"signal/frontier_coverage_15/group_std_mean": 0.19005082845687865,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026210711803287268,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026210711803287268,
"signal/frontier_coverage_20/centered_abs_mean": 0.11272255331277847,
"signal/frontier_coverage_20/group_std_mean": 0.14563319385051726,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020177337806671857,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020177337806671857,
"signal/frontier_coverage_25/centered_abs_mean": 0.07451429963111877,
"signal/frontier_coverage_25/group_std_mean": 0.0956909030675888,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013338059186935424,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013338059186935424,
"signal/frontier_coverage_5/centered_abs_mean": 0.15080978870391845,
"signal/frontier_coverage_5/group_std_mean": 0.19573720097541808,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002699495013803244,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002699495013803244,
"signal/frontier_ece_reward/centered_abs_mean": 0.004899371787905693,
"signal/frontier_ece_reward/group_std_mean": 0.006204710435122251,
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006124214734882117,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006124214734882117,
"step": 285
},
{
"calibration/aurc": 0.4121583044665427,
"calibration/batch_distribution_entropy": 0.9241065674179074,
"calibration/buffer_distribution_entropy": 0.9318724189860458,
"calibration/confidence_entropy": 0.4084272631109712,
"calibration/coverage@0%": 0.009375,
"calibration/coverage@1%": 0.009375,
"calibration/coverage@10%": 0.01640625,
"calibration/coverage@15%": 0.01640625,
"calibration/coverage@20%": 0.022265625,
"calibration/coverage@25%": 0.09375,
"calibration/coverage@30%": 0.271875,
"calibration/coverage@5%": 0.009375,
"calibration/ece": 0.1893808706693447,
"calibration/mean_confidence": 0.5403191218712677,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 527.6,
"completions/max_terminated_length": 527.6,
"completions/mean_length": 233.3056640625,
"completions/mean_terminated_length": 233.3056640625,
"completions/min_length": 117.2,
"completions/min_terminated_length": 117.2,
"epoch": 0.928,
"grad_norm": 0.0006464759935624897,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 976715639.0,
"reward": 1.0367928266525268,
"reward_std": 0.06266704574227333,
"rewards/accuracy_reward": 0.6103515625,
"rewards/brier_reward": 0.8035065650939941,
"rewards/confidence_uniqueness_reward": 0.9388595581054687,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.003317755740135908,
"rewards/frontier_coverage_1": 0.13958473801612853,
"rewards/frontier_coverage_10": 0.13958473801612853,
"rewards/frontier_coverage_15": 0.13682184219360352,
"rewards/frontier_coverage_20": 0.1055952787399292,
"rewards/frontier_coverage_25": 0.07915615886449814,
"rewards/frontier_coverage_5": 0.13958473801612853,
"rewards/frontier_ece_reward": 0.005030411807820201,
"signal/accuracy_reward/centered_abs_mean": 0.07967529296875,
"signal/accuracy_reward/group_std_mean": 0.1087621882557869,
"signal/accuracy_reward/group_zero_std_frac": 0.675,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039837646484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.039837646484375,
"signal/advantage_abs_mean": 0.04707158431410789,
"signal/advantage_pre_scale_abs_mean": 0.04707158431410789,
"signal/advantage_pre_scale_std": 0.09145613610744477,
"signal/advantage_std": 0.09145613610744477,
"signal/brier_reward/centered_abs_mean": 0.11411072015762329,
"signal/brier_reward/group_std_mean": 0.1472643107175827,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014263840019702911,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014263840019702911,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029550457000732423,
"signal/confidence_uniqueness_reward/group_std_mean": 0.037665216624736785,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003693807125091553,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003693807125091553,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029917174484580754,
"signal/frontier_aurc_reward/group_std_mean": 0.004943959508091211,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3551741439150645e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3551741439150645e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14711553156375884,
"signal/frontier_coverage_1/group_std_mean": 0.19341881871223449,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026333680376410483,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026333680376410483,
"signal/frontier_coverage_10/centered_abs_mean": 0.14711553156375884,
"signal/frontier_coverage_10/group_std_mean": 0.19341881871223449,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026333680376410483,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026333680376410483,
"signal/frontier_coverage_15/centered_abs_mean": 0.14292107820510863,
"signal/frontier_coverage_15/group_std_mean": 0.1878840833902359,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025582872331142426,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025582872331142426,
"signal/frontier_coverage_20/centered_abs_mean": 0.10920373499393463,
"signal/frontier_coverage_20/group_std_mean": 0.14428247809410094,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019547467585653068,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019547467585653068,
"signal/frontier_coverage_25/centered_abs_mean": 0.07394303530454635,
"signal/frontier_coverage_25/group_std_mean": 0.09636994302272797,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001323580276221037,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001323580276221037,
"signal/frontier_coverage_5/centered_abs_mean": 0.14711553156375884,
"signal/frontier_coverage_5/group_std_mean": 0.19341881871223449,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026333680376410483,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026333680376410483,
"signal/frontier_ece_reward/centered_abs_mean": 0.005132979806512594,
"signal/frontier_ece_reward/group_std_mean": 0.00646492512896657,
"signal/frontier_ece_reward/group_zero_std_frac": 0.040625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006416224758140742,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006416224758140742,
"step": 290
},
{
"calibration/aurc": 0.2322381487413944,
"calibration/batch_distribution_entropy": 0.9131056212873986,
"calibration/buffer_distribution_entropy": 0.9310349150148444,
"calibration/confidence_entropy": 0.3937384896123201,
"calibration/coverage@0%": 0.057421875,
"calibration/coverage@1%": 0.057421875,
"calibration/coverage@10%": 0.2296875,
"calibration/coverage@15%": 0.35546875,
"calibration/coverage@20%": 0.47578125,
"calibration/coverage@25%": 0.57265625,
"calibration/coverage@30%": 0.6640625,
"calibration/coverage@5%": 0.1171875,
"calibration/ece": 0.11506757812499999,
"calibration/mean_confidence": 0.550976953125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 759.8,
"completions/max_terminated_length": 550.8,
"completions/mean_length": 242.756640625,
"completions/mean_terminated_length": 242.6308380126953,
"completions/min_length": 120.4,
"completions/min_terminated_length": 120.4,
"epoch": 0.944,
"grad_norm": 0.000754023902118206,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 994176891.0,
"reward": 1.039714527130127,
"reward_std": 0.07349804490804672,
"rewards/accuracy_reward": 0.609765625,
"rewards/brier_reward": 0.8154372692108154,
"rewards/confidence_uniqueness_reward": 0.9433710098266601,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.003159674210473895,
"rewards/frontier_coverage_1": 0.15151877403259278,
"rewards/frontier_coverage_10": 0.15151877403259278,
"rewards/frontier_coverage_15": 0.14611534178256988,
"rewards/frontier_coverage_20": 0.11554400622844696,
"rewards/frontier_coverage_25": 0.08363842219114304,
"rewards/frontier_coverage_5": 0.15151877403259278,
"rewards/frontier_ece_reward": 0.006149538699537516,
"signal/accuracy_reward/centered_abs_mean": 0.105615234375,
"signal/accuracy_reward/group_std_mean": 0.13928218185901642,
"signal/accuracy_reward/group_zero_std_frac": 0.603125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0528076171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0528076171875,
"signal/advantage_abs_mean": 0.05543512031435967,
"signal/advantage_pre_scale_abs_mean": 0.05543512031435967,
"signal/advantage_pre_scale_std": 0.1069907784461975,
"signal/advantage_std": 0.1069907784461975,
"signal/brier_reward/centered_abs_mean": 0.11077233403921127,
"signal/brier_reward/group_std_mean": 0.14170551002025605,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01384654175490141,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01384654175490141,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026658696308732032,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03480346091091633,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003332337038591504,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003332337038591504,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002645864570513368,
"signal/frontier_aurc_reward/group_std_mean": 0.004210462234914303,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7360976168420166e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7360976168420166e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.146555095911026,
"signal/frontier_coverage_1/group_std_mean": 0.18947678804397583,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026233360171318055,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026233360171318055,
"signal/frontier_coverage_10/centered_abs_mean": 0.146555095911026,
"signal/frontier_coverage_10/group_std_mean": 0.18947678804397583,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026233360171318055,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026233360171318055,
"signal/frontier_coverage_15/centered_abs_mean": 0.14236142635345458,
"signal/frontier_coverage_15/group_std_mean": 0.18402716219425203,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025482695084065197,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025482695084065197,
"signal/frontier_coverage_20/centered_abs_mean": 0.10681335628032684,
"signal/frontier_coverage_20/group_std_mean": 0.13840672373771667,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019119590055197478,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019119590055197478,
"signal/frontier_coverage_25/centered_abs_mean": 0.07202518582344056,
"signal/frontier_coverage_25/group_std_mean": 0.09276955872774124,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001289250748232007,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001289250748232007,
"signal/frontier_coverage_5/centered_abs_mean": 0.146555095911026,
"signal/frontier_coverage_5/group_std_mean": 0.18947678804397583,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026233360171318055,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026233360171318055,
"signal/frontier_ece_reward/centered_abs_mean": 0.0051291721872985365,
"signal/frontier_ece_reward/group_std_mean": 0.006471920944750309,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006411465234123171,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006411465234123171,
"step": 295
},
{
"calibration/aurc": 0.33955971949694363,
"calibration/batch_distribution_entropy": 0.8857733162872842,
"calibration/buffer_distribution_entropy": 0.9298230128935456,
"calibration/confidence_entropy": 0.3718501892191845,
"calibration/coverage@0%": 0.00703125,
"calibration/coverage@1%": 0.00703125,
"calibration/coverage@10%": 0.02578125,
"calibration/coverage@15%": 0.146875,
"calibration/coverage@20%": 0.31640625,
"calibration/coverage@25%": 0.36875,
"calibration/coverage@30%": 0.405859375,
"calibration/coverage@5%": 0.00703125,
"calibration/ece": 0.1850365021007531,
"calibration/mean_confidence": 0.5966286162392834,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 773.0,
"completions/max_terminated_length": 586.2,
"completions/mean_length": 245.58359375,
"completions/mean_terminated_length": 245.33098449707032,
"completions/min_length": 121.4,
"completions/min_terminated_length": 121.4,
"epoch": 0.96,
"grad_norm": 0.0005919244140386581,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 1011631987.0,
"reward": 1.0362752437591554,
"reward_std": 0.06122729256749153,
"rewards/accuracy_reward": 0.59599609375,
"rewards/brier_reward": 0.829588258266449,
"rewards/confidence_uniqueness_reward": 0.9436734795570374,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0040486148092895744,
"rewards/frontier_coverage_1": 0.17092968821525573,
"rewards/frontier_coverage_10": 0.17092968821525573,
"rewards/frontier_coverage_15": 0.16497417092323302,
"rewards/frontier_coverage_20": 0.12459131479263305,
"rewards/frontier_coverage_25": 0.08991340845823288,
"rewards/frontier_coverage_5": 0.17092968821525573,
"rewards/frontier_ece_reward": 0.006544246431440115,
"signal/accuracy_reward/centered_abs_mean": 0.076776123046875,
"signal/accuracy_reward/group_std_mean": 0.10603935718536377,
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0383880615234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0383880615234375,
"signal/advantage_abs_mean": 0.04462712332606315,
"signal/advantage_pre_scale_abs_mean": 0.04462712332606315,
"signal/advantage_pre_scale_std": 0.09253625869750977,
"signal/advantage_std": 0.09253625869750977,
"signal/brier_reward/centered_abs_mean": 0.09673822671175003,
"signal/brier_reward/group_std_mean": 0.12821625024080277,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012092278338968754,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012092278338968754,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025702812895178794,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03373255953192711,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032128516118973493,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032128516118973493,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003233627835288644,
"signal/frontier_aurc_reward/group_std_mean": 0.004815721325576306,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.7881935936165975e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.7881935936165975e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12415488362312317,
"signal/frontier_coverage_1/group_std_mean": 0.16360531747341156,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022223723120987416,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022223723120987416,
"signal/frontier_coverage_10/centered_abs_mean": 0.12415488362312317,
"signal/frontier_coverage_10/group_std_mean": 0.16360531747341156,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022223723120987416,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022223723120987416,
"signal/frontier_coverage_15/centered_abs_mean": 0.1208252727985382,
"signal/frontier_coverage_15/group_std_mean": 0.1590008407831192,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002162772277370095,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002162772277370095,
"signal/frontier_coverage_20/centered_abs_mean": 0.0897542342543602,
"signal/frontier_coverage_20/group_std_mean": 0.11785899251699447,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016066007083281875,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016066007083281875,
"signal/frontier_coverage_25/centered_abs_mean": 0.0641759216785431,
"signal/frontier_coverage_25/group_std_mean": 0.08313264548778534,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011487489799037575,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011487489799037575,
"signal/frontier_coverage_5/centered_abs_mean": 0.12415488362312317,
"signal/frontier_coverage_5/group_std_mean": 0.16360531747341156,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022223723120987416,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022223723120987416,
"signal/frontier_ece_reward/centered_abs_mean": 0.004897785000503063,
"signal/frontier_ece_reward/group_std_mean": 0.0062327212654054165,
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006122231250628829,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006122231250628829,
"step": 300
},
{
"epoch": 0.96,
"eval_calibration/aurc": 0.46128748795665087,
"eval_calibration/batch_distribution_entropy": 0.8756256531737828,
"eval_calibration/buffer_distribution_entropy": 0.9280413379460178,
"eval_calibration/confidence_entropy": 0.41193622937777086,
"eval_calibration/coverage@0%": 0.0703125,
"eval_calibration/coverage@1%": 0.0703125,
"eval_calibration/coverage@10%": 0.0703125,
"eval_calibration/coverage@15%": 0.1171875,
"eval_calibration/coverage@20%": 0.140625,
"eval_calibration/coverage@25%": 0.1953125,
"eval_calibration/coverage@30%": 0.2109375,
"eval_calibration/coverage@5%": 0.0703125,
"eval_calibration/ece": 0.21648227695323158,
"eval_calibration/mean_confidence": 0.5482010269532316,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 456.75,
"eval_completions/max_terminated_length": 456.75,
"eval_completions/mean_length": 251.32495880126953,
"eval_completions/mean_terminated_length": 251.32495880126953,
"eval_completions/min_length": 142.0,
"eval_completions/min_terminated_length": 142.0,
"eval_loss": 0.0,
"eval_num_tokens": 1011631987.0,
"eval_reward": 0.9415136426687241,
"eval_reward_std": 0.23694248497486115,
"eval_rewards/accuracy_reward": 0.421875,
"eval_rewards/brier_reward": 0.7852368354797363,
"eval_rewards/confidence_uniqueness_reward": 0.895263671875,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0037054395070299506,
"eval_rewards/frontier_coverage_1": 0.21414391696453094,
"eval_rewards/frontier_coverage_10": 0.21414391696453094,
"eval_rewards/frontier_coverage_15": 0.2132040672004223,
"eval_rewards/frontier_coverage_20": 0.1507317405194044,
"eval_rewards/frontier_coverage_25": 0.10258364118635654,
"eval_rewards/frontier_coverage_5": 0.21414391696453094,
"eval_rewards/frontier_ece_reward": 0.005837466917000711,
"eval_runtime": 22.9077,
"eval_samples_per_second": 21.827,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4697265625,
"eval_signal/accuracy_reward/group_std_mean": 0.4920375719666481,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23486328125,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23486328125,
"eval_signal/advantage_abs_mean": 0.21758165583014488,
"eval_signal/advantage_pre_scale_abs_mean": 0.21758165583014488,
"eval_signal/advantage_pre_scale_std": 0.23440348356962204,
"eval_signal/advantage_std": 0.23440348356962204,
"eval_signal/brier_reward/centered_abs_mean": 0.23594782128930092,
"eval_signal/brier_reward/group_std_mean": 0.2868555709719658,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029493477661162615,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.029493477661162615,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0440826416015625,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.053775970824062824,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0055103302001953125,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0055103302001953125,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004771354433614761,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.009579721372574568,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.540724047634285e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.540724047634285e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3561520427465439,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4406690001487732,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006375121418386698,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006375121418386698,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3561520427465439,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4406690001487732,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006375121418386698,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006375121418386698,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3545154333114624,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4385922998189926,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006345825968310237,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006345825968310237,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.24524880945682526,
"eval_signal/frontier_coverage_20/group_std_mean": 0.30554553121328354,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0043899534502997994,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0043899534502997994,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.14788980782032013,
"eval_signal/frontier_coverage_25/group_std_mean": 0.18959416821599007,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026472274912521243,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026472274912521243,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3561520427465439,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4406690001487732,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006375121418386698,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006375121418386698,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.009522880194708705,
"eval_signal/frontier_ece_reward/group_std_mean": 0.011444894364103675,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011903600243385881,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011903600243385881,
"eval_steps_per_second": 0.175,
"step": 300
},
{
"calibration/aurc": 0.2459965736429119,
"calibration/batch_distribution_entropy": 0.9147780574307252,
"calibration/buffer_distribution_entropy": 0.9281321077000616,
"calibration/confidence_entropy": 0.4052235295442716,
"calibration/coverage@0%": 0.029296875,
"calibration/coverage@1%": 0.029296875,
"calibration/coverage@10%": 0.279296875,
"calibration/coverage@15%": 0.39609375,
"calibration/coverage@20%": 0.5109375,
"calibration/coverage@25%": 0.56796875,
"calibration/coverage@30%": 0.62109375,
"calibration/coverage@5%": 0.07265625,
"calibration/ece": 0.12373667296527428,
"calibration/mean_confidence": 0.5660033672845597,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 1153.4,
"completions/max_terminated_length": 594.8,
"completions/mean_length": 248.48974609375,
"completions/mean_terminated_length": 247.73369445800782,
"completions/min_length": 120.4,
"completions/min_terminated_length": 120.4,
"epoch": 0.976,
"grad_norm": 0.000649201450869441,
"learning_rate": 1e-06,
"loss": 0.0016,
"num_tokens": 1029037642.0,
"reward": 1.0403510093688966,
"reward_std": 0.061715726554393766,
"rewards/accuracy_reward": 0.61328125,
"rewards/brier_reward": 0.819303834438324,
"rewards/confidence_uniqueness_reward": 0.9493087649345398,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.002960939984768629,
"rewards/frontier_coverage_1": 0.12880419678986071,
"rewards/frontier_coverage_10": 0.12880419678986071,
"rewards/frontier_coverage_15": 0.12648025006055832,
"rewards/frontier_coverage_20": 0.10162520408630371,
"rewards/frontier_coverage_25": 0.07422572486102581,
"rewards/frontier_coverage_5": 0.12880419678986071,
"rewards/frontier_ece_reward": 0.005600927863270044,
"signal/accuracy_reward/centered_abs_mean": 0.07640380859375,
"signal/accuracy_reward/group_std_mean": 0.106211369484663,
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.038201904296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.038201904296875,
"signal/advantage_abs_mean": 0.04466826543211937,
"signal/advantage_pre_scale_abs_mean": 0.04466826543211937,
"signal/advantage_pre_scale_std": 0.09117967635393143,
"signal/advantage_std": 0.09117967635393143,
"signal/brier_reward/centered_abs_mean": 0.09978740066289901,
"signal/brier_reward/group_std_mean": 0.13079679906368255,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012473425082862376,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012473425082862376,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023243288323283196,
"signal/confidence_uniqueness_reward/group_std_mean": 0.031104259938001633,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029054110404103995,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029054110404103995,
"signal/format_reward/centered_abs_mean": 0.001275634765625,
"signal/format_reward/group_std_mean": 0.002798851951956749,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006378173828125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006378173828125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002286581532098353,
"signal/frontier_aurc_reward/group_std_mean": 0.003588835708796978,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0929808164946735e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0929808164946735e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13535113930702208,
"signal/frontier_coverage_1/group_std_mean": 0.1770560622215271,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024227853398770096,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024227853398770096,
"signal/frontier_coverage_10/centered_abs_mean": 0.13535113930702208,
"signal/frontier_coverage_10/group_std_mean": 0.1770560622215271,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024227853398770096,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024227853398770096,
"signal/frontier_coverage_15/centered_abs_mean": 0.13063560128211976,
"signal/frontier_coverage_15/group_std_mean": 0.17087839543819427,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023383772233501075,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023383772233501075,
"signal/frontier_coverage_20/centered_abs_mean": 0.09394551813602448,
"signal/frontier_coverage_20/group_std_mean": 0.12267908304929734,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016816247487440705,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016816247487440705,
"signal/frontier_coverage_25/centered_abs_mean": 0.06524330824613571,
"signal/frontier_coverage_25/group_std_mean": 0.08443802744150161,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011678551556542516,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011678551556542516,
"signal/frontier_coverage_5/centered_abs_mean": 0.13535113930702208,
"signal/frontier_coverage_5/group_std_mean": 0.1770560622215271,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024227853398770096,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024227853398770096,
"signal/frontier_ece_reward/centered_abs_mean": 0.004773552063852549,
"signal/frontier_ece_reward/group_std_mean": 0.006137214787304402,
"signal/frontier_ece_reward/group_zero_std_frac": 0.03125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005966940079815686,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005966940079815686,
"step": 305
},
{
"calibration/aurc": 0.3583176928401651,
"calibration/batch_distribution_entropy": 0.9294162782931643,
"calibration/buffer_distribution_entropy": 0.9286617297056032,
"calibration/confidence_entropy": 0.40752941939728976,
"calibration/coverage@0%": 0.01484375,
"calibration/coverage@1%": 0.01484375,
"calibration/coverage@10%": 0.05625,
"calibration/coverage@15%": 0.107421875,
"calibration/coverage@20%": 0.136328125,
"calibration/coverage@25%": 0.28046875,
"calibration/coverage@30%": 0.403515625,
"calibration/coverage@5%": 0.01875,
"calibration/ece": 0.16133625666639104,
"calibration/mean_confidence": 0.49935197729238023,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 571.6,
"completions/max_terminated_length": 571.6,
"completions/mean_length": 241.47685546875,
"completions/mean_terminated_length": 241.47685546875,
"completions/min_length": 112.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.992,
"grad_norm": 0.0007859015022404492,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 1046638845.0,
"reward": 1.0216437101364135,
"reward_std": 0.06261338591575623,
"rewards/accuracy_reward": 0.573828125,
"rewards/brier_reward": 0.8116239428520202,
"rewards/confidence_uniqueness_reward": 0.9438789367675782,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0030248827766627072,
"rewards/frontier_coverage_1": 0.1581905961036682,
"rewards/frontier_coverage_10": 0.1581905961036682,
"rewards/frontier_coverage_15": 0.15104590654373168,
"rewards/frontier_coverage_20": 0.1126480221748352,
"rewards/frontier_coverage_25": 0.08232222348451615,
"rewards/frontier_coverage_5": 0.1581905961036682,
"rewards/frontier_ece_reward": 0.00525932852178812,
"signal/accuracy_reward/centered_abs_mean": 0.0842041015625,
"signal/accuracy_reward/group_std_mean": 0.11003593057394027,
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04210205078125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04210205078125,
"signal/advantage_abs_mean": 0.04805062413215637,
"signal/advantage_pre_scale_abs_mean": 0.04805062413215637,
"signal/advantage_pre_scale_std": 0.09543496072292328,
"signal/advantage_std": 0.09543496072292328,
"signal/brier_reward/centered_abs_mean": 0.10354410707950593,
"signal/brier_reward/group_std_mean": 0.13237534910440446,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01294301338493824,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01294301338493824,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024483132362365722,
"signal/confidence_uniqueness_reward/group_std_mean": 0.031796820089221,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030603915452957152,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030603915452957152,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002486141538247466,
"signal/frontier_aurc_reward/group_std_mean": 0.004122919822111726,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4501933734864e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4501933734864e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1334092989563942,
"signal/frontier_coverage_1/group_std_mean": 0.17288561463356017,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00238802635576576,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00238802635576576,
"signal/frontier_coverage_10/centered_abs_mean": 0.1334092989563942,
"signal/frontier_coverage_10/group_std_mean": 0.17288561463356017,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00238802635576576,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00238802635576576,
"signal/frontier_coverage_15/centered_abs_mean": 0.1263820171356201,
"signal/frontier_coverage_15/group_std_mean": 0.16385815739631654,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022622381802648306,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022622381802648306,
"signal/frontier_coverage_20/centered_abs_mean": 0.08821254223585129,
"signal/frontier_coverage_20/group_std_mean": 0.11483215242624283,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015790044097229838,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015790044097229838,
"signal/frontier_coverage_25/centered_abs_mean": 0.06242813915014267,
"signal/frontier_coverage_25/group_std_mean": 0.08045191913843155,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001117463654372841,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001117463654372841,
"signal/frontier_coverage_5/centered_abs_mean": 0.1334092989563942,
"signal/frontier_coverage_5/group_std_mean": 0.17288561463356017,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00238802635576576,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00238802635576576,
"signal/frontier_ece_reward/centered_abs_mean": 0.00461451355367899,
"signal/frontier_ece_reward/group_std_mean": 0.005895448848605156,
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005768141942098737,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005768141942098737,
"step": 310
},
{
"calibration/aurc": 0.27146581736436004,
"calibration/batch_distribution_entropy": 0.8746381090032178,
"calibration/buffer_distribution_entropy": 0.9301393590851372,
"calibration/confidence_entropy": 0.3797766950336877,
"calibration/coverage@0%": 0.0107421875,
"calibration/coverage@1%": 0.0107421875,
"calibration/coverage@10%": 0.1044921875,
"calibration/coverage@15%": 0.12890625,
"calibration/coverage@20%": 0.171875,
"calibration/coverage@25%": 0.4501953125,
"calibration/coverage@30%": 0.67578125,
"calibration/coverage@5%": 0.0693359375,
"calibration/ece": 0.17222164030985487,
"calibration/mean_confidence": 0.6362255465598549,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 524.5,
"completions/max_terminated_length": 524.5,
"completions/mean_length": 244.13977813720703,
"completions/mean_terminated_length": 244.13977813720703,
"completions/min_length": 123.5,
"completions/min_terminated_length": 123.5,
"epoch": 0.9984,
"num_tokens": 1053636222.0,
"reward": 1.0224891901016235,
"reward_std": 0.06762239336967468,
"rewards/accuracy_reward": 0.590087890625,
"rewards/brier_reward": 0.7884363532066345,
"rewards/confidence_uniqueness_reward": 0.9537101686000824,
"rewards/format_reward": 0.999755859375,
"rewards/frontier_aurc_reward": -0.002803257084451616,
"rewards/frontier_coverage_1": 0.10104693099856377,
"rewards/frontier_coverage_10": 0.10104693099856377,
"rewards/frontier_coverage_15": 0.09339457005262375,
"rewards/frontier_coverage_20": 0.06919080764055252,
"rewards/frontier_coverage_25": 0.054883923381567,
"rewards/frontier_coverage_5": 0.10104693099856377,
"rewards/frontier_ece_reward": 0.004242375260218978,
"signal/accuracy_reward/centered_abs_mean": 0.0766448974609375,
"signal/accuracy_reward/group_std_mean": 0.10971884056925774,
"signal/accuracy_reward/group_zero_std_frac": 0.6484375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03832244873046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03832244873046875,
"signal/advantage_abs_mean": 0.04935746267437935,
"signal/advantage_pre_scale_abs_mean": 0.04935746267437935,
"signal/advantage_pre_scale_std": 0.0978938564658165,
"signal/advantage_std": 0.0978938564658165,
"signal/brier_reward/centered_abs_mean": 0.10980122536420822,
"signal/brier_reward/group_std_mean": 0.14020781219005585,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013725153170526028,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013725153170526028,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0197527464479208,
"signal/confidence_uniqueness_reward/group_std_mean": 0.026468923315405846,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024690933059901,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024690933059901,
"signal/format_reward/centered_abs_mean": 0.0004730224609375,
"signal/format_reward/group_std_mean": 0.0013810679083690047,
"signal/format_reward/group_zero_std_frac": 0.9921875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00023651123046875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00023651123046875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023492295295000076,
"signal/frontier_aurc_reward/group_std_mean": 0.003797045210376382,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.205120785627514e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.205120785627514e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1279282197356224,
"signal/frontier_coverage_1/group_std_mean": 0.16734013706445694,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022899151081219316,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022899151081219316,
"signal/frontier_coverage_10/centered_abs_mean": 0.1279282197356224,
"signal/frontier_coverage_10/group_std_mean": 0.16734013706445694,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022899151081219316,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022899151081219316,
"signal/frontier_coverage_15/centered_abs_mean": 0.1206963062286377,
"signal/frontier_coverage_15/group_std_mean": 0.15781164169311523,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021604637149721384,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021604637149721384,
"signal/frontier_coverage_20/centered_abs_mean": 0.08331097289919853,
"signal/frontier_coverage_20/group_std_mean": 0.10951121896505356,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014912663027644157,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014912663027644157,
"signal/frontier_coverage_25/centered_abs_mean": 0.057270800694823265,
"signal/frontier_coverage_25/group_std_mean": 0.07496540248394012,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001025147212203592,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001025147212203592,
"signal/frontier_coverage_5/centered_abs_mean": 0.1279282197356224,
"signal/frontier_coverage_5/group_std_mean": 0.16734013706445694,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022899151081219316,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022899151081219316,
"signal/frontier_ece_reward/centered_abs_mean": 0.004676059354096651,
"signal/frontier_ece_reward/group_std_mean": 0.006006488110870123,
"signal/frontier_ece_reward/group_zero_std_frac": 0.03125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005845074192620814,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005845074192620814,
"step": 312,
"total_flos": 0.0,
"train_loss": 0.004934354589493477,
"train_runtime": 58560.1613,
"train_samples_per_second": 0.342,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 312,
"num_input_tokens_seen": 1053636222,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}